diff options
Diffstat (limited to 'src')
378 files changed, 12567 insertions, 7753 deletions
diff --git a/src/auth/Auth.h b/src/auth/Auth.h index 5521c8d3fcf..83e23b34dbe 100644 --- a/src/auth/Auth.h +++ b/src/auth/Auth.h @@ -16,6 +16,7 @@ #define CEPH_AUTHTYPES_H #include "Crypto.h" +#include "common/ceph_json.h" #include "common/entity_name.h" // The _MAX values are a bit wonky here because we are overloading the first @@ -59,6 +60,14 @@ struct EntityAuth { decode(pending_key, bl); } } + void dump(ceph::Formatter *f) const { + f->dump_object("key", key); + encode_json("caps", caps, f); + f->dump_object("pending_key", pending_key); + } + static void generate_test_instances(std::list<EntityAuth*>& ls) { + ls.push_back(new EntityAuth); + } }; WRITE_CLASS_ENCODER(EntityAuth) @@ -95,6 +104,19 @@ struct AuthCapsInfo { allow_all = (bool)a; decode(caps, bl); } + void dump(ceph::Formatter *f) const { + f->dump_bool("allow_all", allow_all); + encode_json("caps", caps, f); + f->dump_unsigned("caps_len", caps.length()); + } + static void generate_test_instances(std::list<AuthCapsInfo*>& ls) { + ls.push_back(new AuthCapsInfo); + ls.push_back(new AuthCapsInfo); + ls.back()->allow_all = true; + ls.push_back(new AuthCapsInfo); + ls.back()->caps.append("foo"); + ls.back()->caps.append("bar"); + } }; WRITE_CLASS_ENCODER(AuthCapsInfo) @@ -147,6 +169,25 @@ struct AuthTicket { decode(caps, bl); decode(flags, bl); } + void dump(ceph::Formatter *f) const { + f->dump_object("name", name); + f->dump_unsigned("global_id", global_id); + f->dump_stream("created") << created; + f->dump_stream("renew_after") << renew_after; + f->dump_stream("expires") << expires; + f->dump_object("caps", caps); + f->dump_unsigned("flags", flags); + } + static void generate_test_instances(std::list<AuthTicket*>& ls) { + ls.push_back(new AuthTicket); + ls.push_back(new AuthTicket); + ls.back()->name.set_id("client.123"); + ls.back()->global_id = 123; + ls.back()->init_timestamps(utime_t(123, 456), 7); + ls.back()->caps.caps.append("foo"); + ls.back()->caps.caps.append("bar"); + ls.back()->flags = 0x12345678; + } }; WRITE_CLASS_ENCODER(AuthTicket) @@ -231,6 +272,16 @@ struct ExpiringCryptoKey { decode(key, bl); decode(expiration, bl); } + void dump(ceph::Formatter *f) const { + f->dump_object("key", key); + f->dump_stream("expiration") << expiration; + } + static void generate_test_instances(std::list<ExpiringCryptoKey*>& ls) { + ls.push_back(new ExpiringCryptoKey); + ls.push_back(new ExpiringCryptoKey); + ls.back()->key.set_secret( + CEPH_CRYPTO_AES, bufferptr("1234567890123456", 16), utime_t(123, 456)); + } }; WRITE_CLASS_ENCODER(ExpiringCryptoKey) @@ -295,6 +346,15 @@ struct RotatingSecrets { } void dump(); + void dump(ceph::Formatter *f) const { + encode_json("secrets", secrets, f); + } + static void generate_test_instances(std::list<RotatingSecrets*>& ls) { + ls.push_back(new RotatingSecrets); + ls.push_back(new RotatingSecrets); + auto eck = new ExpiringCryptoKey; + ls.back()->add(*eck); + } }; WRITE_CLASS_ENCODER(RotatingSecrets) diff --git a/src/auth/Crypto.cc b/src/auth/Crypto.cc index ce666e8bdc8..5d68d3470bc 100644 --- a/src/auth/Crypto.cc +++ b/src/auth/Crypto.cc @@ -511,6 +511,23 @@ void CryptoKey::decode(bufferlist::const_iterator& bl) throw ceph::buffer::malformed_input("malformed secret"); } +void CryptoKey::dump(Formatter *f) const +{ + f->dump_int("type", type); + f->dump_stream("created") << created; + f->dump_int("secret.length", secret.length()); +} + +void CryptoKey::generate_test_instances(std::list<CryptoKey*>& ls) +{ + ls.push_back(new CryptoKey); + ls.push_back(new CryptoKey); + ls.back()->type = CEPH_CRYPTO_AES; + ls.back()->set_secret( + CEPH_CRYPTO_AES, bufferptr("1234567890123456", 16), utime_t(123, 456)); + ls.back()->created = utime_t(123, 456); +} + int CryptoKey::set_secret(int type, const bufferptr& s, utime_t c) { int r = _set_secret(type, s); diff --git a/src/auth/Crypto.h b/src/auth/Crypto.h index a29ac1abd81..3ce655a1256 100644 --- a/src/auth/Crypto.h +++ b/src/auth/Crypto.h @@ -111,6 +111,8 @@ public: void encode(ceph::buffer::list& bl) const; void decode(ceph::buffer::list::const_iterator& bl); + void dump(ceph::Formatter *f) const; + static void generate_test_instances(std::list<CryptoKey*>& ls); void clear() { *this = CryptoKey(); diff --git a/src/auth/cephx/CephxKeyServer.cc b/src/auth/cephx/CephxKeyServer.cc index 236ac451add..a5ad1f2b797 100644 --- a/src/auth/cephx/CephxKeyServer.cc +++ b/src/auth/cephx/CephxKeyServer.cc @@ -257,6 +257,16 @@ std::map<EntityName,CryptoKey> KeyServer::get_used_pending_keys() return ret; } +void KeyServer::dump(Formatter *f) const +{ + f->dump_object("data", data); +} + +void KeyServer::generate_test_instances(std::list<KeyServer*>& ls) +{ + ls.push_back(new KeyServer(nullptr, nullptr)); +} + bool KeyServer::generate_secret(CryptoKey& secret) { bufferptr bp; diff --git a/src/auth/cephx/CephxKeyServer.h b/src/auth/cephx/CephxKeyServer.h index 64915c8ce4a..d147dd441ad 100644 --- a/src/auth/cephx/CephxKeyServer.h +++ b/src/auth/cephx/CephxKeyServer.h @@ -21,15 +21,16 @@ #include "include/common_fwd.h" struct KeyServerData { - version_t version; + version_t version{0}; /* for each entity */ std::map<EntityName, EntityAuth> secrets; - KeyRing *extra_secrets; + KeyRing *extra_secrets = nullptr; /* for each service type */ - version_t rotating_ver; + version_t rotating_ver{0}; std::map<uint32_t, RotatingSecrets> rotating_secrets; + KeyServerData() {} explicit KeyServerData(KeyRing *extra) : version(0), @@ -70,7 +71,17 @@ struct KeyServerData { decode(rotating_ver, iter); decode(rotating_secrets, iter); } - + void dump(ceph::Formatter *f) const { + f->dump_unsigned("version", version); + f->dump_unsigned("rotating_version", rotating_ver); + encode_json("secrets", secrets, f); + encode_json("rotating_secrets", rotating_secrets, f); + } + static void generate_test_instances(std::list<KeyServerData*>& ls) { + ls.push_back(new KeyServerData); + ls.push_back(new KeyServerData); + ls.back()->version = 1; + } bool contains(const EntityName& name) const { return (secrets.find(name) != secrets.end()); } @@ -159,8 +170,21 @@ struct KeyServerData { decode(auth, bl); } } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("op", op); + f->dump_object("name", name); + f->dump_object("auth", auth); + } + static void generate_test_instances(std::list<Incremental*>& ls) { + ls.push_back(new Incremental); + ls.back()->op = AUTH_INC_DEL; + ls.push_back(new Incremental); + ls.back()->op = AUTH_INC_ADD; + ls.push_back(new Incremental); + ls.back()->op = AUTH_INC_SET_ROTATING; + } }; - + void apply_incremental(Incremental& inc) { switch (inc.op) { case AUTH_INC_ADD: @@ -188,8 +212,6 @@ WRITE_CLASS_ENCODER(KeyServerData) WRITE_CLASS_ENCODER(KeyServerData::Incremental) - - class KeyServer : public KeyStore { CephContext *cct; KeyServerData data; @@ -205,7 +227,9 @@ class KeyServer : public KeyStore { bool _get_service_caps(const EntityName& name, uint32_t service_id, AuthCapsInfo& caps) const; public: + KeyServer() : lock{ceph::make_mutex("KeyServer::lock")} {} KeyServer(CephContext *cct_, KeyRing *extra_secrets); + KeyServer& operator=(const KeyServer&) = delete; bool generate_secret(CryptoKey& secret); bool get_secret(const EntityName& name, CryptoKey& secret) const override; @@ -248,6 +272,8 @@ public: using ceph::decode; decode(data, bl); } + void dump(ceph::Formatter *f) const; + static void generate_test_instances(std::list<KeyServer*>& ls); bool contains(const EntityName& name) const; int encode_secrets(ceph::Formatter *f, std::stringstream *ds) const; void encode_formatted(std::string label, ceph::Formatter *f, ceph::buffer::list &bl); diff --git a/src/auth/cephx/CephxProtocol.h b/src/auth/cephx/CephxProtocol.h index aabfaaad10c..260cb13ff5a 100644 --- a/src/auth/cephx/CephxProtocol.h +++ b/src/auth/cephx/CephxProtocol.h @@ -55,6 +55,13 @@ struct CephXServerChallenge { decode(struct_v, bl); decode(server_challenge, bl); } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("server_challenge", server_challenge); + } + static void generate_test_instances(std::list<CephXServerChallenge*>& ls) { + ls.push_back(new CephXServerChallenge); + ls.back()->server_challenge = 1; + } }; WRITE_CLASS_ENCODER(CephXServerChallenge) @@ -72,6 +79,13 @@ struct CephXRequestHeader { using ceph::decode; decode(request_type, bl); } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("request_type", request_type); + } + static void generate_test_instances(std::list<CephXRequestHeader*>& ls) { + ls.push_back(new CephXRequestHeader); + ls.back()->request_type = 1; + } }; WRITE_CLASS_ENCODER(CephXRequestHeader) @@ -89,6 +103,15 @@ struct CephXResponseHeader { decode(request_type, bl); decode(status, bl); } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("request_type", request_type); + f->dump_int("status", status); + } + static void generate_test_instances(std::list<CephXResponseHeader*>& ls) { + ls.push_back(new CephXResponseHeader); + ls.back()->request_type = 1; + ls.back()->status = 0; + } }; WRITE_CLASS_ENCODER(CephXResponseHeader) @@ -113,6 +136,17 @@ struct CephXTicketBlob { decode(secret_id, bl); decode(blob, bl); } + + void dump(ceph::Formatter *f) const { + f->dump_unsigned("secret_id", secret_id); + f->dump_unsigned("blob_len", blob.length()); + } + + static void generate_test_instances(std::list<CephXTicketBlob*>& ls) { + ls.push_back(new CephXTicketBlob); + ls.back()->secret_id = 123; + ls.back()->blob.append(std::string_view("this is a blob")); + } }; WRITE_CLASS_ENCODER(CephXTicketBlob) @@ -152,6 +186,25 @@ struct CephXAuthenticate { // old_ticket both on reconnects and renewals old_ticket_may_be_omitted = struct_v < 3; } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("client_challenge", client_challenge); + f->dump_unsigned("key", key); + f->open_object_section("old_ticket"); + old_ticket.dump(f); + f->close_section(); + f->dump_unsigned("other_keys", other_keys); + } + static void generate_test_instances(std::list<CephXAuthenticate*>& ls) { + ls.push_back(new CephXAuthenticate); + ls.back()->client_challenge = 0; + ls.back()->key = 0; + ls.push_back(new CephXAuthenticate); + ls.back()->client_challenge = 1; + ls.back()->key = 2; + ls.back()->old_ticket.secret_id = 3; + ls.back()->old_ticket.blob.append(std::string_view("this is a blob")); + ls.back()->other_keys = 4; + } }; WRITE_CLASS_ENCODER(CephXAuthenticate) @@ -168,6 +221,15 @@ struct CephXChallengeBlob { decode(server_challenge, bl); decode(client_challenge, bl); } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("server_challenge", server_challenge); + f->dump_unsigned("client_challenge", client_challenge); + } + static void generate_test_instances(std::list<CephXChallengeBlob*>& ls) { + ls.push_back(new CephXChallengeBlob); + ls.back()->server_challenge = 123; + ls.back()->client_challenge = 456; + } }; WRITE_CLASS_ENCODER(CephXChallengeBlob) @@ -218,6 +280,15 @@ struct CephXServiceTicketRequest { decode(struct_v, bl); decode(keys, bl); } + + void dump(ceph::Formatter *f) const { + f->dump_unsigned("keys", keys); + } + + static void generate_test_instances(std::list<CephXServiceTicketRequest*>& ls) { + ls.push_back(new CephXServiceTicketRequest); + ls.back()->keys = 123; + } }; WRITE_CLASS_ENCODER(CephXServiceTicketRequest) @@ -251,6 +322,17 @@ struct CephXAuthorizeReply { decode(connection_secret, bl); } } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("nonce_plus_one", nonce_plus_one); + f->dump_string("connection_secret", connection_secret); + } + static void generate_test_instances(std::list<CephXAuthorizeReply*>& ls) { + ls.push_back(new CephXAuthorizeReply); + ls.back()->nonce_plus_one = 0; + ls.push_back(new CephXAuthorizeReply); + ls.back()->nonce_plus_one = 123; + ls.back()->connection_secret = "secret"; + } }; WRITE_CLASS_ENCODER(CephXAuthorizeReply) @@ -353,6 +435,17 @@ struct CephXServiceTicket { decode(session_key, bl); decode(validity, bl); } + void dump(ceph::Formatter *f) const { + session_key.dump(f); + validity.dump(f); + } + static void generate_test_instances(std::list<CephXServiceTicket*>& ls) { + ls.push_back(new CephXServiceTicket); + ls.push_back(new CephXServiceTicket); + ls.back()->session_key.set_secret( + CEPH_CRYPTO_AES, bufferptr("1234567890123456", 16), utime_t(123, 456)); + ls.back()->validity = utime_t(123, 456); + } }; WRITE_CLASS_ENCODER(CephXServiceTicket) @@ -375,6 +468,18 @@ struct CephXServiceTicketInfo { decode(ticket, bl); decode(session_key, bl); } + void dump(ceph::Formatter *f) const { + ticket.dump(f); + session_key.dump(f); + } + static void generate_test_instances(std::list<CephXServiceTicketInfo*>& ls) { + ls.push_back(new CephXServiceTicketInfo); + ls.push_back(new CephXServiceTicketInfo); + ls.back()->ticket.global_id = 1234; + ls.back()->ticket.init_timestamps(utime_t(123, 456), utime_t(123, 456)); + ls.back()->session_key.set_secret( + CEPH_CRYPTO_AES, bufferptr("1234567890123456", 16), utime_t(123, 456)); + } }; WRITE_CLASS_ENCODER(CephXServiceTicketInfo) @@ -392,6 +497,13 @@ struct CephXAuthorizeChallenge : public AuthAuthorizerChallenge { decode(struct_v, bl); decode(server_challenge, bl); } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("server_challenge", server_challenge); + } + static void generate_test_instances(std::list<CephXAuthorizeChallenge*>& ls) { + ls.push_back(new CephXAuthorizeChallenge); + ls.back()->server_challenge = 1234; + } }; WRITE_CLASS_ENCODER(CephXAuthorizeChallenge) @@ -417,6 +529,18 @@ struct CephXAuthorize { decode(server_challenge_plus_one, bl); } } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("nonce", nonce); + f->dump_unsigned("have_challenge", have_challenge); + f->dump_unsigned("server_challenge_plus_one", server_challenge_plus_one); + } + static void generate_test_instances(std::list<CephXAuthorize*>& ls) { + ls.push_back(new CephXAuthorize); + ls.push_back(new CephXAuthorize); + ls.back()->nonce = 1234; + ls.back()->have_challenge = true; + ls.back()->server_challenge_plus_one = 1234; + } }; WRITE_CLASS_ENCODER(CephXAuthorize) diff --git a/src/ceph-volume/ceph_volume/devices/lvm/listing.py b/src/ceph-volume/ceph_volume/devices/lvm/listing.py index c16afdaa767..8fb9d8ddcf8 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/listing.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/listing.py @@ -153,7 +153,9 @@ class List(object): elif arg[0] == '/': lv = api.get_lvs_from_path(arg) else: - lv = [api.get_single_lv(filters={'lv_name': arg.split('/')[1]})] + vg_name, lv_name = arg.split('/') + lv = [api.get_single_lv(filters={'lv_name': lv_name, + 'vg_name': vg_name})] report = self.create_report(lv) diff --git a/src/ceph-volume/ceph_volume/util/device.py b/src/ceph-volume/ceph_volume/util/device.py index bb806292f2c..c3de2a97fd9 100644 --- a/src/ceph-volume/ceph_volume/util/device.py +++ b/src/ceph-volume/ceph_volume/util/device.py @@ -460,11 +460,11 @@ class Device(object): def device_type(self): self.load_blkid_api() if 'type' in self.sys_api: - return self.sys_api['type'] + return self.sys_api.get('type') elif self.disk_api: - return self.disk_api['TYPE'] + return self.disk_api.get('TYPE') elif self.blkid_api: - return self.blkid_api['TYPE'] + return self.blkid_api.get('TYPE') @property def is_mpath(self): @@ -478,9 +478,9 @@ class Device(object): def is_partition(self): self.load_blkid_api() if self.disk_api: - return self.disk_api['TYPE'] == 'part' + return self.disk_api.get('TYPE') == 'part' elif self.blkid_api: - return self.blkid_api['TYPE'] == 'part' + return self.blkid_api.get('TYPE') == 'part' return False @property diff --git a/src/ceph-volume/ceph_volume/util/disk.py b/src/ceph-volume/ceph_volume/util/disk.py index 04104936985..0bbd5336d5a 100644 --- a/src/ceph-volume/ceph_volume/util/disk.py +++ b/src/ceph-volume/ceph_volume/util/disk.py @@ -802,7 +802,7 @@ def get_partitions(_sys_dev_block_path ='/sys/dev/block'): result = dict() for device in devices: device_path = os.path.join(_sys_dev_block_path, device) - is_partition = get_file_contents(os.path.join(device_path, 'partition')) == "1" + is_partition = int(get_file_contents(os.path.join(device_path, 'partition'), '0')) > 0 if not is_partition: continue diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py index 348f581f9e6..ec90db6305b 100755 --- a/src/cephadm/cephadm.py +++ b/src/cephadm/cephadm.py @@ -18,7 +18,7 @@ import tempfile import time import errno import ssl -from typing import Dict, List, Tuple, Optional, Union, Any, Callable, IO, Sequence, TypeVar, cast, Iterable, TextIO +from typing import Dict, List, Tuple, Optional, Union, Any, Callable, Sequence, TypeVar, cast, Iterable import re import uuid @@ -28,29 +28,14 @@ from functools import wraps from glob import glob from io import StringIO from threading import Thread, Event -from urllib.error import HTTPError, URLError from urllib.request import urlopen, Request from pathlib import Path from cephadmlib.constants import ( # default images - DEFAULT_ALERT_MANAGER_IMAGE, - DEFAULT_ELASTICSEARCH_IMAGE, - DEFAULT_GRAFANA_IMAGE, - DEFAULT_HAPROXY_IMAGE, DEFAULT_IMAGE, DEFAULT_IMAGE_IS_MAIN, DEFAULT_IMAGE_RELEASE, - DEFAULT_JAEGER_AGENT_IMAGE, - DEFAULT_JAEGER_COLLECTOR_IMAGE, - DEFAULT_JAEGER_QUERY_IMAGE, - DEFAULT_KEEPALIVED_IMAGE, - DEFAULT_LOKI_IMAGE, - DEFAULT_NODE_EXPORTER_IMAGE, - DEFAULT_NVMEOF_IMAGE, - DEFAULT_PROMETHEUS_IMAGE, - DEFAULT_PROMTAIL_IMAGE, - DEFAULT_SNMP_GATEWAY_IMAGE, # other constant values CEPH_CONF, CEPH_CONF_DIR, @@ -82,7 +67,6 @@ from cephadmlib.context_getters import ( get_config_and_keyring, get_parm, read_configuration_source, - should_log_to_journald, ) from cephadmlib.exceptions import ( ClusterAlreadyExists, @@ -99,7 +83,6 @@ from cephadmlib.call_wrappers import ( concurrent_tasks, ) from cephadmlib.container_engines import ( - Docker, Podman, check_container_engine, find_container_engine, @@ -107,7 +90,6 @@ from cephadmlib.container_engines import ( registry_login, ) from cephadmlib.data_utils import ( - dict_get, dict_get_join, get_legacy_config_fsid, is_fsid, @@ -120,7 +102,6 @@ from cephadmlib.file_utils import ( get_file_timestamp, makedirs, pathify, - populate_files, read_file, recursive_chown, touch, @@ -134,7 +115,6 @@ from cephadmlib.net_utils import ( check_subnet, get_fqdn, get_hostname, - get_ip_addresses, get_short_hostname, ip_in_subnets, is_ipv6, @@ -145,10 +125,16 @@ from cephadmlib.net_utils import ( wrap_ipv6, ) from cephadmlib.locking import FileLock -from cephadmlib.daemon_identity import DaemonIdentity, DaemonSubIdentity +from cephadmlib.daemon_identity import DaemonIdentity from cephadmlib.packagers import create_packager, Packager -from cephadmlib.logging import cephadm_init_logging, Highlight, LogDestination +from cephadmlib.logging import ( + cephadm_init_logging, + Highlight, + LogDestination, +) from cephadmlib.systemd import check_unit, check_units +from cephadmlib import systemd_unit +from cephadmlib import runscripts from cephadmlib.container_types import ( CephContainer, InitContainer, @@ -169,10 +155,26 @@ from cephadmlib.daemon_form import ( register as register_daemon_form, ) from cephadmlib.deploy import DeploymentType -from cephadmlib.container_daemon_form import ContainerDaemonForm +from cephadmlib.container_daemon_form import ( + ContainerDaemonForm, + daemon_to_container, +) from cephadmlib.sysctl import install_sysctl, migrate_sysctl_dir from cephadmlib.firewalld import Firewalld, update_firewalld from cephadmlib import templating +from cephadmlib.daemons.ceph import get_ceph_mounts_for_type, ceph_daemons +from cephadmlib.daemons import ( + Ceph, + CephIscsi, + CephNvmeof, + CustomContainer, + HAproxy, + Keepalived, + Monitoring, + NFSGanesha, + SNMPGateway, + Tracing, +) FuncT = TypeVar('FuncT', bound=Callable) @@ -208,1980 +210,6 @@ class ContainerInfo: ################################## -@register_daemon_form -class Ceph(ContainerDaemonForm): - _daemons = ('mon', 'mgr', 'osd', 'mds', 'rgw', 'rbd-mirror', - 'crash', 'cephfs-mirror') - - @classmethod - def for_daemon_type(cls, daemon_type: str) -> bool: - # TODO: figure out a way to un-special-case osd - return daemon_type in cls._daemons and daemon_type != 'osd' - - def __init__(self, ctx: CephadmContext, ident: DaemonIdentity) -> None: - self.ctx = ctx - self._identity = ident - self.user_supplied_config = False - - @classmethod - def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'Ceph': - return cls(ctx, ident) - - @property - def identity(self) -> DaemonIdentity: - return self._identity - - def firewall_service_name(self) -> str: - if self.identity.daemon_type == 'mon': - return 'ceph-mon' - elif self.identity.daemon_type in ['mgr', 'mds']: - return 'ceph' - return '' - - def container(self, ctx: CephadmContext) -> CephContainer: - # previous to being a ContainerDaemonForm, this make_var_run - # call was hard coded in the deploy path. Eventually, it would be - # good to move this somwhere cleaner and avoid needing to know the - # uid/gid here. - uid, gid = self.uid_gid(ctx) - make_var_run(ctx, ctx.fsid, uid, gid) - - ctr = get_container(ctx, self.identity) - ctr = to_deployment_container(ctx, ctr) - config_json = fetch_configs(ctx) - if self.identity.daemon_type == 'mon' and config_json is not None: - if 'crush_location' in config_json: - c_loc = config_json['crush_location'] - # was originally "c.args.extend(['--set-crush-location', c_loc])" - # but that doesn't seem to persist in the object after it's passed - # in further function calls - ctr.args = ctr.args + ['--set-crush-location', c_loc] - return ctr - - _uid_gid: Optional[Tuple[int, int]] = None - - def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: - if self._uid_gid is None: - self._uid_gid = extract_uid_gid(ctx) - return self._uid_gid - - def config_and_keyring( - self, ctx: CephadmContext - ) -> Tuple[Optional[str], Optional[str]]: - return get_config_and_keyring(ctx) - - def get_daemon_args(self) -> List[str]: - if self.identity.daemon_type == 'crash': - return [] - r = [ - '--setuser', 'ceph', - '--setgroup', 'ceph', - '--default-log-to-file=false', - ] - log_to_journald = should_log_to_journald(self.ctx) - if log_to_journald: - r += [ - '--default-log-to-journald=true', - '--default-log-to-stderr=false', - ] - else: - r += [ - '--default-log-to-stderr=true', - '--default-log-stderr-prefix=debug ', - ] - if self.identity.daemon_type == 'mon': - r += [ - '--default-mon-cluster-log-to-file=false', - ] - if log_to_journald: - r += [ - '--default-mon-cluster-log-to-journald=true', - '--default-mon-cluster-log-to-stderr=false', - ] - else: - r += ['--default-mon-cluster-log-to-stderr=true'] - return r - - @staticmethod - def get_ceph_mounts( - ctx: CephadmContext, - ident: DaemonIdentity, - no_config: bool = False, - ) -> Dict[str, str]: - # Warning: This is a hack done for more expedient refactoring - mounts = _get_container_mounts_for_type( - ctx, ident.fsid, ident.daemon_type - ) - data_dir = ident.data_dir(ctx.data_dir) - if ident.daemon_type == 'rgw': - cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % ( - ident.daemon_id - ) - else: - cdata_dir = '/var/lib/ceph/%s/ceph-%s' % ( - ident.daemon_type, - ident.daemon_id, - ) - if ident.daemon_type != 'crash': - mounts[data_dir] = cdata_dir + ':z' - if not no_config: - mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z' - if ident.daemon_type in [ - 'rbd-mirror', - 'cephfs-mirror', - 'crash', - 'ceph-exporter', - ]: - # these do not search for their keyrings in a data directory - mounts[ - data_dir + '/keyring' - ] = '/etc/ceph/ceph.client.%s.%s.keyring' % ( - ident.daemon_type, - ident.daemon_id, - ) - return mounts - - def customize_container_mounts( - self, ctx: CephadmContext, mounts: Dict[str, str] - ) -> None: - cm = self.get_ceph_mounts( - ctx, - self.identity, - no_config=self.ctx.config and self.user_supplied_config, - ) - mounts.update(cm) - - def customize_container_args( - self, ctx: CephadmContext, args: List[str] - ) -> None: - args.append(ctx.container_engine.unlimited_pids_option) - - def customize_process_args( - self, ctx: CephadmContext, args: List[str] - ) -> None: - ident = self.identity - if ident.daemon_type == 'rgw': - name = 'client.rgw.%s' % ident.daemon_id - elif ident.daemon_type == 'rbd-mirror': - name = 'client.rbd-mirror.%s' % ident.daemon_id - elif ident.daemon_type == 'cephfs-mirror': - name = 'client.cephfs-mirror.%s' % ident.daemon_id - elif ident.daemon_type == 'crash': - name = 'client.crash.%s' % ident.daemon_id - elif ident.daemon_type in ['mon', 'mgr', 'mds', 'osd']: - name = ident.daemon_name - else: - raise ValueError(ident) - args.extend(['-n', name]) - if ident.daemon_type != 'crash': - args.append('-f') - args.extend(self.get_daemon_args()) - - def customize_container_envs( - self, ctx: CephadmContext, envs: List[str] - ) -> None: - envs.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728') - - def default_entrypoint(self) -> str: - ep = { - 'rgw': '/usr/bin/radosgw', - 'rbd-mirror': '/usr/bin/rbd-mirror', - 'cephfs-mirror': '/usr/bin/cephfs-mirror', - } - daemon_type = self.identity.daemon_type - return ep.get(daemon_type) or f'/usr/bin/ceph-{daemon_type}' - -################################## - - -@register_daemon_form -class OSD(Ceph): - @classmethod - def for_daemon_type(cls, daemon_type: str) -> bool: - # TODO: figure out a way to un-special-case osd - return daemon_type == 'osd' - - def __init__( - self, - ctx: CephadmContext, - ident: DaemonIdentity, - osd_fsid: Optional[str] = None, - ) -> None: - super().__init__(ctx, ident) - self._osd_fsid = osd_fsid - - @classmethod - def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'OSD': - osd_fsid = getattr(ctx, 'osd_fsid', None) - if osd_fsid is None: - logger.info( - 'Creating an OSD daemon form without an OSD FSID value' - ) - return cls(ctx, ident, osd_fsid) - - @staticmethod - def get_sysctl_settings() -> List[str]: - return [ - '# allow a large number of OSDs', - 'fs.aio-max-nr = 1048576', - 'kernel.pid_max = 4194304', - ] - - def firewall_service_name(self) -> str: - return 'ceph' - - @property - def osd_fsid(self) -> Optional[str]: - return self._osd_fsid - - -################################## - - -@register_daemon_form -class SNMPGateway(ContainerDaemonForm): - """Defines an SNMP gateway between Prometheus and SNMP monitoring Frameworks""" - daemon_type = 'snmp-gateway' - SUPPORTED_VERSIONS = ['V2c', 'V3'] - default_image = DEFAULT_SNMP_GATEWAY_IMAGE - DEFAULT_PORT = 9464 - env_filename = 'snmp-gateway.conf' - - @classmethod - def for_daemon_type(cls, daemon_type: str) -> bool: - return cls.daemon_type == daemon_type - - def __init__(self, - ctx: CephadmContext, - fsid: str, - daemon_id: Union[int, str], - config_json: Dict[str, Any], - image: Optional[str] = None) -> None: - self.ctx = ctx - self.fsid = fsid - self.daemon_id = daemon_id - self.image = image or SNMPGateway.default_image - - self.uid = config_json.get('uid', 0) - self.gid = config_json.get('gid', 0) - - self.destination = config_json.get('destination', '') - self.snmp_version = config_json.get('snmp_version', 'V2c') - self.snmp_community = config_json.get('snmp_community', 'public') - self.log_level = config_json.get('log_level', 'info') - self.snmp_v3_auth_username = config_json.get('snmp_v3_auth_username', '') - self.snmp_v3_auth_password = config_json.get('snmp_v3_auth_password', '') - self.snmp_v3_auth_protocol = config_json.get('snmp_v3_auth_protocol', '') - self.snmp_v3_priv_protocol = config_json.get('snmp_v3_priv_protocol', '') - self.snmp_v3_priv_password = config_json.get('snmp_v3_priv_password', '') - self.snmp_v3_engine_id = config_json.get('snmp_v3_engine_id', '') - - self.validate() - - @classmethod - def init(cls, ctx: CephadmContext, fsid: str, - daemon_id: Union[int, str]) -> 'SNMPGateway': - cfgs = fetch_configs(ctx) - assert cfgs # assert some config data was found - return cls(ctx, fsid, daemon_id, cfgs, ctx.image) - - @classmethod - def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'SNMPGateway': - return cls.init(ctx, ident.fsid, ident.daemon_id) - - @property - def identity(self) -> DaemonIdentity: - return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) - - @staticmethod - def get_version(ctx: CephadmContext, fsid: str, daemon_id: str) -> Optional[str]: - """Return the version of the notifier from it's http endpoint""" - path = os.path.join(ctx.data_dir, fsid, f'snmp-gateway.{daemon_id}', 'unit.meta') - try: - with open(path, 'r') as env: - metadata = json.loads(env.read()) - except (OSError, json.JSONDecodeError): - return None - - ports = metadata.get('ports', []) - if not ports: - return None - - try: - with urlopen(f'http://127.0.0.1:{ports[0]}/') as r: - html = r.read().decode('utf-8').split('\n') - except (HTTPError, URLError): - return None - - for h in html: - stripped = h.strip() - if stripped.startswith(('<pre>', '<PRE>')) and \ - stripped.endswith(('</pre>', '</PRE>')): - # <pre>(version=1.2.1, branch=HEAD, revision=7... - return stripped.split(',')[0].split('version=')[1] - - return None - - @property - def port(self) -> int: - endpoints = fetch_endpoints(self.ctx) - if not endpoints: - return self.DEFAULT_PORT - return endpoints[0].port - - def get_daemon_args(self) -> List[str]: - v3_args = [] - base_args = [ - f'--web.listen-address=:{self.port}', - f'--snmp.destination={self.destination}', - f'--snmp.version={self.snmp_version}', - f'--log.level={self.log_level}', - '--snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl' - ] - - if self.snmp_version == 'V3': - # common auth settings - v3_args.extend([ - '--snmp.authentication-enabled', - f'--snmp.authentication-protocol={self.snmp_v3_auth_protocol}', - f'--snmp.security-engine-id={self.snmp_v3_engine_id}' - ]) - # authPriv setting is applied if we have a privacy protocol setting - if self.snmp_v3_priv_protocol: - v3_args.extend([ - '--snmp.private-enabled', - f'--snmp.private-protocol={self.snmp_v3_priv_protocol}' - ]) - - return base_args + v3_args - - @property - def data_dir(self) -> str: - return os.path.join(self.ctx.data_dir, self.ctx.fsid, f'{self.daemon_type}.{self.daemon_id}') - - @property - def conf_file_path(self) -> str: - return os.path.join(self.data_dir, self.env_filename) - - def create_daemon_conf(self) -> None: - """Creates the environment file holding 'secrets' passed to the snmp-notifier daemon""" - with write_new(self.conf_file_path) as f: - if self.snmp_version == 'V2c': - f.write(f'SNMP_NOTIFIER_COMMUNITY={self.snmp_community}\n') - else: - f.write(f'SNMP_NOTIFIER_AUTH_USERNAME={self.snmp_v3_auth_username}\n') - f.write(f'SNMP_NOTIFIER_AUTH_PASSWORD={self.snmp_v3_auth_password}\n') - if self.snmp_v3_priv_password: - f.write(f'SNMP_NOTIFIER_PRIV_PASSWORD={self.snmp_v3_priv_password}\n') - - def validate(self) -> None: - """Validate the settings - - Raises: - Error: if the fsid doesn't look like an fsid - Error: if the snmp version is not supported - Error: destination IP and port address missing - """ - if not is_fsid(self.fsid): - raise Error(f'not a valid fsid: {self.fsid}') - - if self.snmp_version not in SNMPGateway.SUPPORTED_VERSIONS: - raise Error(f'not a valid snmp version: {self.snmp_version}') - - if not self.destination: - raise Error('config is missing destination attribute(<ip>:<port>) of the target SNMP listener') - - def container(self, ctx: CephadmContext) -> CephContainer: - ctr = get_container(ctx, self.identity) - return to_deployment_container(ctx, ctr) - - def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: - return self.uid, self.gid - - def customize_container_args( - self, ctx: CephadmContext, args: List[str] - ) -> None: - args.append(f'--env-file={self.conf_file_path}') - - def customize_process_args( - self, ctx: CephadmContext, args: List[str] - ) -> None: - args.extend(self.get_daemon_args()) - - -################################## -@register_daemon_form -class Monitoring(ContainerDaemonForm): - """Define the configs for the monitoring containers""" - - port_map = { - 'prometheus': [9095], # Avoid default 9090, due to conflict with cockpit UI - 'node-exporter': [9100], - 'grafana': [3000], - 'alertmanager': [9093, 9094], - 'loki': [3100], - 'promtail': [9080] - } - - components = { - 'prometheus': { - 'image': DEFAULT_PROMETHEUS_IMAGE, - 'cpus': '2', - 'memory': '4GB', - 'args': [ - '--config.file=/etc/prometheus/prometheus.yml', - '--storage.tsdb.path=/prometheus', - ], - 'config-json-files': [ - 'prometheus.yml', - ], - }, - 'loki': { - 'image': DEFAULT_LOKI_IMAGE, - 'cpus': '1', - 'memory': '1GB', - 'args': [ - '--config.file=/etc/loki/loki.yml', - ], - 'config-json-files': [ - 'loki.yml' - ], - }, - 'promtail': { - 'image': DEFAULT_PROMTAIL_IMAGE, - 'cpus': '1', - 'memory': '1GB', - 'args': [ - '--config.file=/etc/promtail/promtail.yml', - ], - 'config-json-files': [ - 'promtail.yml', - ], - }, - 'node-exporter': { - 'image': DEFAULT_NODE_EXPORTER_IMAGE, - 'cpus': '1', - 'memory': '1GB', - 'args': [ - '--no-collector.timex' - ], - }, - 'grafana': { - 'image': DEFAULT_GRAFANA_IMAGE, - 'cpus': '2', - 'memory': '4GB', - 'args': [], - 'config-json-files': [ - 'grafana.ini', - 'provisioning/datasources/ceph-dashboard.yml', - 'certs/cert_file', - 'certs/cert_key', - ], - }, - 'alertmanager': { - 'image': DEFAULT_ALERT_MANAGER_IMAGE, - 'cpus': '2', - 'memory': '2GB', - 'args': [ - '--cluster.listen-address=:{}'.format(port_map['alertmanager'][1]), - ], - 'config-json-files': [ - 'alertmanager.yml', - ], - 'config-json-args': [ - 'peers', - ], - }, - } # type: ignore - - @classmethod - def for_daemon_type(cls, daemon_type: str) -> bool: - return daemon_type in cls.components - - @staticmethod - def get_version(ctx, container_id, daemon_type): - # type: (CephadmContext, str, str) -> str - """ - :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter" - """ - assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter', 'loki', 'promtail') - cmd = daemon_type.replace('-', '_') - code = -1 - err = '' - out = '' - version = '' - if daemon_type == 'alertmanager': - for cmd in ['alertmanager', 'prometheus-alertmanager']: - out, err, code = call(ctx, [ - ctx.container_engine.path, 'exec', container_id, cmd, - '--version' - ], verbosity=CallVerbosity.QUIET) - if code == 0: - break - cmd = 'alertmanager' # reset cmd for version extraction - else: - out, err, code = call(ctx, [ - ctx.container_engine.path, 'exec', container_id, cmd, '--version' - ], verbosity=CallVerbosity.QUIET) - if code == 0: - if err.startswith('%s, version ' % cmd): - version = err.split(' ')[2] - elif out.startswith('%s, version ' % cmd): - version = out.split(' ')[2] - return version - - @staticmethod - def extract_uid_gid( - ctx: CephadmContext, daemon_type: str - ) -> Tuple[int, int]: - if daemon_type == 'prometheus': - uid, gid = extract_uid_gid(ctx, file_path='/etc/prometheus') - elif daemon_type == 'node-exporter': - uid, gid = 65534, 65534 - elif daemon_type == 'grafana': - uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana') - elif daemon_type == 'loki': - uid, gid = extract_uid_gid(ctx, file_path='/etc/loki') - elif daemon_type == 'promtail': - uid, gid = extract_uid_gid(ctx, file_path='/etc/promtail') - elif daemon_type == 'alertmanager': - uid, gid = extract_uid_gid( - ctx, file_path=['/etc/alertmanager', '/etc/prometheus'] - ) - else: - raise Error('{} not implemented yet'.format(daemon_type)) - return uid, gid - - def __init__(self, ctx: CephadmContext, ident: DaemonIdentity) -> None: - self.ctx = ctx - self._identity = ident - - @classmethod - def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'Monitoring': - return cls(ctx, ident) - - @property - def identity(self) -> DaemonIdentity: - return self._identity - - def container(self, ctx: CephadmContext) -> CephContainer: - self._prevalidate(ctx) - ctr = get_container(ctx, self.identity) - return to_deployment_container(ctx, ctr) - - def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: - return self.extract_uid_gid(ctx, self.identity.daemon_type) - - def _prevalidate(self, ctx: CephadmContext) -> None: - # before being refactored into a ContainerDaemonForm these checks were - # done inside the deploy function. This was the only "family" of daemons - # that performed these checks in that location - daemon_type = self.identity.daemon_type - config = fetch_configs(ctx) # type: ignore - required_files = self.components[daemon_type].get( - 'config-json-files', list() - ) - required_args = self.components[daemon_type].get( - 'config-json-args', list() - ) - if required_files: - if not config or not all(c in config.get('files', {}).keys() for c in required_files): # type: ignore - raise Error( - '{} deployment requires config-json which must ' - 'contain file content for {}'.format( - daemon_type.capitalize(), ', '.join(required_files) - ) - ) - if required_args: - if not config or not all(c in config.keys() for c in required_args): # type: ignore - raise Error( - '{} deployment requires config-json which must ' - 'contain arg for {}'.format( - daemon_type.capitalize(), ', '.join(required_args) - ) - ) - - def get_daemon_args(self) -> List[str]: - ctx = self.ctx - daemon_type = self.identity.daemon_type - metadata = self.components[daemon_type] - r = list(metadata.get('args', [])) - # set ip and port to bind to for nodeexporter,alertmanager,prometheus - if daemon_type not in ['grafana', 'loki', 'promtail']: - ip = '' - port = self.port_map[daemon_type][0] - meta = fetch_meta(ctx) - if meta: - if 'ip' in meta and meta['ip']: - ip = meta['ip'] - if 'ports' in meta and meta['ports']: - port = meta['ports'][0] - r += [f'--web.listen-address={ip}:{port}'] - if daemon_type == 'prometheus': - config = fetch_configs(ctx) - retention_time = config.get('retention_time', '15d') - retention_size = config.get('retention_size', '0') # default to disabled - r += [f'--storage.tsdb.retention.time={retention_time}'] - r += [f'--storage.tsdb.retention.size={retention_size}'] - scheme = 'http' - host = get_fqdn() - # in case host is not an fqdn then we use the IP to - # avoid producing a broken web.external-url link - if '.' not in host: - ipv4_addrs, ipv6_addrs = get_ip_addresses(get_hostname()) - # use the first ipv4 (if any) otherwise use the first ipv6 - addr = next(iter(ipv4_addrs or ipv6_addrs), None) - host = wrap_ipv6(addr) if addr else host - r += [f'--web.external-url={scheme}://{host}:{port}'] - if daemon_type == 'alertmanager': - config = fetch_configs(ctx) - peers = config.get('peers', list()) # type: ignore - for peer in peers: - r += ['--cluster.peer={}'.format(peer)] - try: - r += [f'--web.config.file={config["web_config"]}'] - except KeyError: - pass - # some alertmanager, by default, look elsewhere for a config - r += ['--config.file=/etc/alertmanager/alertmanager.yml'] - if daemon_type == 'promtail': - r += ['--config.expand-env'] - if daemon_type == 'prometheus': - config = fetch_configs(ctx) - try: - r += [f'--web.config.file={config["web_config"]}'] - except KeyError: - pass - if daemon_type == 'node-exporter': - config = fetch_configs(ctx) - try: - r += [f'--web.config.file={config["web_config"]}'] - except KeyError: - pass - r += ['--path.procfs=/host/proc', - '--path.sysfs=/host/sys', - '--path.rootfs=/rootfs'] - return r - - def _get_container_mounts(self, data_dir: str) -> Dict[str, str]: - ctx = self.ctx - daemon_type = self.identity.daemon_type - mounts: Dict[str, str] = {} - log_dir = get_log_dir(self.identity.fsid, ctx.log_dir) - if daemon_type == 'prometheus': - mounts[ - os.path.join(data_dir, 'etc/prometheus') - ] = '/etc/prometheus:Z' - mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z' - elif daemon_type == 'loki': - mounts[os.path.join(data_dir, 'etc/loki')] = '/etc/loki:Z' - mounts[os.path.join(data_dir, 'data')] = '/loki:Z' - elif daemon_type == 'promtail': - mounts[os.path.join(data_dir, 'etc/promtail')] = '/etc/promtail:Z' - mounts[log_dir] = '/var/log/ceph:z' - mounts[os.path.join(data_dir, 'data')] = '/promtail:Z' - elif daemon_type == 'node-exporter': - mounts[ - os.path.join(data_dir, 'etc/node-exporter') - ] = '/etc/node-exporter:Z' - mounts['/proc'] = '/host/proc:ro' - mounts['/sys'] = '/host/sys:ro' - mounts['/'] = '/rootfs:ro' - elif daemon_type == 'grafana': - mounts[ - os.path.join(data_dir, 'etc/grafana/grafana.ini') - ] = '/etc/grafana/grafana.ini:Z' - mounts[ - os.path.join(data_dir, 'etc/grafana/provisioning/datasources') - ] = '/etc/grafana/provisioning/datasources:Z' - mounts[ - os.path.join(data_dir, 'etc/grafana/certs') - ] = '/etc/grafana/certs:Z' - mounts[ - os.path.join(data_dir, 'data/grafana.db') - ] = '/var/lib/grafana/grafana.db:Z' - elif daemon_type == 'alertmanager': - mounts[ - os.path.join(data_dir, 'etc/alertmanager') - ] = '/etc/alertmanager:Z' - return mounts - - def customize_container_mounts( - self, ctx: CephadmContext, mounts: Dict[str, str] - ) -> None: - data_dir = self.identity.data_dir(ctx.data_dir) - mounts.update(self._get_container_mounts(data_dir)) - - def customize_container_args( - self, ctx: CephadmContext, args: List[str] - ) -> None: - uid, _ = self.uid_gid(ctx) - monitoring_args = [ - '--user', - str(uid), - # FIXME: disable cpu/memory limits for the time being (not supported - # by ubuntu 18.04 kernel!) - ] - args.extend(monitoring_args) - if self.identity.daemon_type == 'node-exporter': - # in order to support setting '--path.procfs=/host/proc','--path.sysfs=/host/sys', - # '--path.rootfs=/rootfs' for node-exporter we need to disable selinux separation - # between the node-exporter container and the host to avoid selinux denials - args.extend(['--security-opt', 'label=disable']) - - def customize_process_args( - self, ctx: CephadmContext, args: List[str] - ) -> None: - args.extend(self.get_daemon_args()) - - def default_entrypoint(self) -> str: - return '' - -################################## - - -@register_daemon_form -class NFSGanesha(ContainerDaemonForm): - """Defines a NFS-Ganesha container""" - - daemon_type = 'nfs' - entrypoint = '/usr/bin/ganesha.nfsd' - daemon_args = ['-F', '-L', 'STDERR'] - - required_files = ['ganesha.conf'] - - port_map = { - 'nfs': 2049, - } - - @classmethod - def for_daemon_type(cls, daemon_type: str) -> bool: - return cls.daemon_type == daemon_type - - def __init__(self, - ctx, - fsid, - daemon_id, - config_json, - image=DEFAULT_IMAGE): - # type: (CephadmContext, str, Union[int, str], Dict, str) -> None - self.ctx = ctx - self.fsid = fsid - self.daemon_id = daemon_id - self.image = image - - # config-json options - self.pool = dict_get(config_json, 'pool', require=True) - self.namespace = dict_get(config_json, 'namespace') - self.userid = dict_get(config_json, 'userid') - self.extra_args = dict_get(config_json, 'extra_args', []) - self.files = dict_get(config_json, 'files', {}) - self.rgw = dict_get(config_json, 'rgw', {}) - - # validate the supplied args - self.validate() - - @classmethod - def init(cls, ctx, fsid, daemon_id): - # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha - return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image) - - @classmethod - def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'NFSGanesha': - return cls.init(ctx, ident.fsid, ident.daemon_id) - - @property - def identity(self) -> DaemonIdentity: - return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) - - def _get_container_mounts(self, data_dir): - # type: (str) -> Dict[str, str] - mounts = dict() - mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z' - mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z' - mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z' - if self.rgw: - cluster = self.rgw.get('cluster', 'ceph') - rgw_user = self.rgw.get('user', 'admin') - mounts[os.path.join(data_dir, 'keyring.rgw')] = \ - '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user) - return mounts - - def customize_container_mounts( - self, ctx: CephadmContext, mounts: Dict[str, str] - ) -> None: - data_dir = self.identity.data_dir(ctx.data_dir) - mounts.update(self._get_container_mounts(data_dir)) - - @staticmethod - def get_container_envs(): - # type: () -> List[str] - envs = [ - 'CEPH_CONF=%s' % (CEPH_DEFAULT_CONF) - ] - return envs - - @staticmethod - def get_version(ctx, container_id): - # type: (CephadmContext, str) -> Optional[str] - version = None - out, err, code = call(ctx, - [ctx.container_engine.path, 'exec', container_id, - NFSGanesha.entrypoint, '-v'], - verbosity=CallVerbosity.QUIET) - if code == 0: - match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out) - if match: - version = match.group(1) - return version - - def validate(self): - # type: () -> None - if not is_fsid(self.fsid): - raise Error('not an fsid: %s' % self.fsid) - if not self.daemon_id: - raise Error('invalid daemon_id: %s' % self.daemon_id) - if not self.image: - raise Error('invalid image: %s' % self.image) - - # check for the required files - if self.required_files: - for fname in self.required_files: - if fname not in self.files: - raise Error('required file missing from config-json: %s' % fname) - - # check for an RGW config - if self.rgw: - if not self.rgw.get('keyring'): - raise Error('RGW keyring is missing') - if not self.rgw.get('user'): - raise Error('RGW user is missing') - - def get_daemon_name(self): - # type: () -> str - return '%s.%s' % (self.daemon_type, self.daemon_id) - - def get_container_name(self, desc=None): - # type: (Optional[str]) -> str - cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) - if desc: - cname = '%s-%s' % (cname, desc) - return cname - - def get_daemon_args(self): - # type: () -> List[str] - return self.daemon_args + self.extra_args - - def create_daemon_dirs(self, data_dir, uid, gid): - # type: (str, int, int) -> None - """Create files under the container data dir""" - if not os.path.isdir(data_dir): - raise OSError('data_dir is not a directory: %s' % (data_dir)) - - logger.info('Creating ganesha config...') - - # create the ganesha conf dir - config_dir = os.path.join(data_dir, 'etc/ganesha') - makedirs(config_dir, uid, gid, 0o755) - - # populate files from the config-json - populate_files(config_dir, self.files, uid, gid) - - # write the RGW keyring - if self.rgw: - keyring_path = os.path.join(data_dir, 'keyring.rgw') - with write_new(keyring_path, owner=(uid, gid)) as f: - f.write(self.rgw.get('keyring', '')) - - def firewall_service_name(self) -> str: - return 'nfs' - - def container(self, ctx: CephadmContext) -> CephContainer: - ctr = get_container(ctx, self.identity) - return to_deployment_container(ctx, ctr) - - def customize_container_endpoints( - self, endpoints: List[EndPoint], deployment_type: DeploymentType - ) -> None: - if deployment_type == DeploymentType.DEFAULT and not endpoints: - nfs_ports = list(NFSGanesha.port_map.values()) - endpoints.extend([EndPoint('0.0.0.0', p) for p in nfs_ports]) - - def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: - # TODO: extract ganesha uid/gid (997, 994) ? - return extract_uid_gid(ctx) - - def config_and_keyring( - self, ctx: CephadmContext - ) -> Tuple[Optional[str], Optional[str]]: - return get_config_and_keyring(ctx) - - def customize_container_envs( - self, ctx: CephadmContext, envs: List[str] - ) -> None: - envs.extend(self.get_container_envs()) - - def customize_process_args( - self, ctx: CephadmContext, args: List[str] - ) -> None: - args.extend(self.get_daemon_args()) - - def customize_container_args( - self, ctx: CephadmContext, args: List[str] - ) -> None: - args.append(ctx.container_engine.unlimited_pids_option) - - def default_entrypoint(self) -> str: - return self.entrypoint - -################################## - - -@register_daemon_form -class CephIscsi(ContainerDaemonForm): - """Defines a Ceph-Iscsi container""" - - daemon_type = 'iscsi' - entrypoint = '/usr/bin/rbd-target-api' - - required_files = ['iscsi-gateway.cfg'] - - @classmethod - def for_daemon_type(cls, daemon_type: str) -> bool: - return cls.daemon_type == daemon_type - - def __init__(self, - ctx, - fsid, - daemon_id, - config_json, - image=DEFAULT_IMAGE): - # type: (CephadmContext, str, Union[int, str], Dict, str) -> None - self.ctx = ctx - self.fsid = fsid - self.daemon_id = daemon_id - self.image = image - - # config-json options - self.files = dict_get(config_json, 'files', {}) - - # validate the supplied args - self.validate() - - @classmethod - def init(cls, ctx, fsid, daemon_id): - # type: (CephadmContext, str, Union[int, str]) -> CephIscsi - return cls(ctx, fsid, daemon_id, - fetch_configs(ctx), ctx.image) - - @classmethod - def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'CephIscsi': - return cls.init(ctx, ident.fsid, ident.daemon_id) - - @property - def identity(self) -> DaemonIdentity: - return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) - - @staticmethod - def _get_container_mounts(data_dir, log_dir): - # type: (str, str) -> Dict[str, str] - mounts = dict() - mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z' - mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z' - mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z' - mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config' - mounts[os.path.join(data_dir, 'tcmu-runner-entrypoint.sh')] = '/usr/local/scripts/tcmu-runner-entrypoint.sh' - mounts[log_dir] = '/var/log:z' - mounts['/dev'] = '/dev' - return mounts - - def customize_container_mounts( - self, ctx: CephadmContext, mounts: Dict[str, str] - ) -> None: - data_dir = self.identity.data_dir(ctx.data_dir) - # Removes ending ".tcmu" from data_dir a tcmu-runner uses the same - # data_dir as rbd-runner-api - if data_dir.endswith('.tcmu'): - data_dir = re.sub(r'\.tcmu$', '', data_dir) - log_dir = get_log_dir(self.identity.fsid, ctx.log_dir) - mounts.update(CephIscsi._get_container_mounts(data_dir, log_dir)) - - def customize_container_binds( - self, ctx: CephadmContext, binds: List[List[str]] - ) -> None: - lib_modules = [ - 'type=bind', - 'source=/lib/modules', - 'destination=/lib/modules', - 'ro=true', - ] - binds.append(lib_modules) - - @staticmethod - def get_version(ctx, container_id): - # type: (CephadmContext, str) -> Optional[str] - version = None - out, err, code = call(ctx, - [ctx.container_engine.path, 'exec', container_id, - '/usr/bin/python3', '-c', - "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"], - verbosity=CallVerbosity.QUIET) - if code == 0: - version = out.strip() - return version - - def validate(self): - # type: () -> None - if not is_fsid(self.fsid): - raise Error('not an fsid: %s' % self.fsid) - if not self.daemon_id: - raise Error('invalid daemon_id: %s' % self.daemon_id) - if not self.image: - raise Error('invalid image: %s' % self.image) - - # check for the required files - if self.required_files: - for fname in self.required_files: - if fname not in self.files: - raise Error('required file missing from config-json: %s' % fname) - - def get_daemon_name(self): - # type: () -> str - return '%s.%s' % (self.daemon_type, self.daemon_id) - - def get_container_name(self, desc=None): - # type: (Optional[str]) -> str - cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) - if desc: - cname = '%s-%s' % (cname, desc) - return cname - - def create_daemon_dirs(self, data_dir, uid, gid): - # type: (str, int, int) -> None - """Create files under the container data dir""" - if not os.path.isdir(data_dir): - raise OSError('data_dir is not a directory: %s' % (data_dir)) - - logger.info('Creating ceph-iscsi config...') - configfs_dir = os.path.join(data_dir, 'configfs') - makedirs(configfs_dir, uid, gid, 0o755) - - # set up the tcmu-runner entrypoint script - # to be mounted into the container. For more info - # on why we need this script, see the - # tcmu_runner_entrypoint_script function - self.files['tcmu-runner-entrypoint.sh'] = self.tcmu_runner_entrypoint_script() - - # populate files from the config-json - populate_files(data_dir, self.files, uid, gid) - - # we want the tcmu runner entrypoint script to be executable - # populate_files will give it 0o600 by default - os.chmod(os.path.join(data_dir, 'tcmu-runner-entrypoint.sh'), 0o700) - - @staticmethod - def configfs_mount_umount(data_dir, mount=True): - # type: (str, bool) -> List[str] - mount_path = os.path.join(data_dir, 'configfs') - if mount: - cmd = 'if ! grep -qs {0} /proc/mounts; then ' \ - 'mount -t configfs none {0}; fi'.format(mount_path) - else: - cmd = 'if grep -qs {0} /proc/mounts; then ' \ - 'umount {0}; fi'.format(mount_path) - return cmd.split() - - @staticmethod - def tcmu_runner_entrypoint_script() -> str: - # since we are having tcmu-runner be a background - # process in its systemd unit (rbd-target-api being - # the main process) systemd will not restart it when - # it fails. in order to try and get around that for now - # we can have a script mounted in the container that - # that attempts to do the restarting for us. This script - # can then become the entrypoint for the tcmu-runner - # container - - # This is intended to be dropped for a better solution - # for at least the squid release onward - return """#!/bin/bash -RUN_DIR=/var/run/tcmu-runner - -if [ ! -d "${RUN_DIR}" ] ; then - mkdir -p "${RUN_DIR}" -fi - -rm -rf "${RUN_DIR}"/* - -while true -do - touch "${RUN_DIR}"/start-up-$(date -Ins) - /usr/bin/tcmu-runner - - # If we got around 3 kills/segfaults in the last minute, - # don't start anymore - if [ $(find "${RUN_DIR}" -type f -cmin -1 | wc -l) -ge 3 ] ; then - exit 0 - fi - - sleep 1 -done -""" - - def get_tcmu_runner_container(self): - # type: () -> CephContainer - # daemon_id, is used to generated the cid and pid files used by podman but as both tcmu-runner - # and rbd-target-api have the same daemon_id, it conflits and prevent the second container from - # starting. .tcmu runner is appended to the daemon_id to fix that. - subident = DaemonSubIdentity( - self.fsid, self.daemon_type, self.daemon_id, 'tcmu' - ) - tcmu_container = to_deployment_container( - self.ctx, get_container(self.ctx, subident) - ) - # TODO: Eventually we don't want to run tcmu-runner through this script. - # This is intended to be a workaround backported to older releases - # and should eventually be removed in at least squid onward - tcmu_container.entrypoint = '/usr/local/scripts/tcmu-runner-entrypoint.sh' - tcmu_container.cname = self.get_container_name(desc='tcmu') - return tcmu_container - - def container(self, ctx: CephadmContext) -> CephContainer: - ctr = get_container(ctx, self.identity) - return to_deployment_container(ctx, ctr) - - def config_and_keyring( - self, ctx: CephadmContext - ) -> Tuple[Optional[str], Optional[str]]: - return get_config_and_keyring(ctx) - - def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: - return extract_uid_gid(ctx) - - def default_entrypoint(self) -> str: - return self.entrypoint - - def customize_container_args( - self, ctx: CephadmContext, args: List[str] - ) -> None: - args.append(ctx.container_engine.unlimited_pids_option) - -################################## - - -@register_daemon_form -class CephNvmeof(ContainerDaemonForm): - """Defines a Ceph-Nvmeof container""" - - daemon_type = 'nvmeof' - required_files = ['ceph-nvmeof.conf'] - default_image = DEFAULT_NVMEOF_IMAGE - - @classmethod - def for_daemon_type(cls, daemon_type: str) -> bool: - return cls.daemon_type == daemon_type - - def __init__(self, - ctx, - fsid, - daemon_id, - config_json, - image=DEFAULT_NVMEOF_IMAGE): - # type: (CephadmContext, str, Union[int, str], Dict, str) -> None - self.ctx = ctx - self.fsid = fsid - self.daemon_id = daemon_id - self.image = image - - # config-json options - self.files = dict_get(config_json, 'files', {}) - - # validate the supplied args - self.validate() - - @classmethod - def init(cls, ctx, fsid, daemon_id): - # type: (CephadmContext, str, Union[int, str]) -> CephNvmeof - return cls(ctx, fsid, daemon_id, - fetch_configs(ctx), ctx.image) - - @classmethod - def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'CephNvmeof': - return cls.init(ctx, ident.fsid, ident.daemon_id) - - @property - def identity(self) -> DaemonIdentity: - return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) - - @staticmethod - def _get_container_mounts(data_dir: str) -> Dict[str, str]: - mounts = dict() - mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z' - mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z' - mounts[os.path.join(data_dir, 'ceph-nvmeof.conf')] = '/src/ceph-nvmeof.conf:z' - mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config' - mounts['/dev/hugepages'] = '/dev/hugepages' - mounts['/dev/vfio/vfio'] = '/dev/vfio/vfio' - return mounts - - def customize_container_mounts( - self, ctx: CephadmContext, mounts: Dict[str, str] - ) -> None: - data_dir = self.identity.data_dir(ctx.data_dir) - mounts.update(self._get_container_mounts(data_dir)) - - def customize_container_binds( - self, ctx: CephadmContext, binds: List[List[str]] - ) -> None: - lib_modules = [ - 'type=bind', - 'source=/lib/modules', - 'destination=/lib/modules', - 'ro=true', - ] - binds.append(lib_modules) - - @staticmethod - def get_version(ctx: CephadmContext, container_id: str) -> Optional[str]: - out, err, ret = call(ctx, - [ctx.container_engine.path, 'inspect', - '--format', '{{index .Config.Labels "io.ceph.version"}}', - ctx.image]) - version = None - if ret == 0: - version = out.strip() - return version - - def validate(self): - # type: () -> None - if not is_fsid(self.fsid): - raise Error('not an fsid: %s' % self.fsid) - if not self.daemon_id: - raise Error('invalid daemon_id: %s' % self.daemon_id) - if not self.image: - raise Error('invalid image: %s' % self.image) - - # check for the required files - if self.required_files: - for fname in self.required_files: - if fname not in self.files: - raise Error('required file missing from config-json: %s' % fname) - - def get_daemon_name(self): - # type: () -> str - return '%s.%s' % (self.daemon_type, self.daemon_id) - - def get_container_name(self, desc=None): - # type: (Optional[str]) -> str - cname = '%s-%s' % (self.fsid, self.get_daemon_name()) - if desc: - cname = '%s-%s' % (cname, desc) - return cname - - def create_daemon_dirs(self, data_dir, uid, gid): - # type: (str, int, int) -> None - """Create files under the container data dir""" - if not os.path.isdir(data_dir): - raise OSError('data_dir is not a directory: %s' % (data_dir)) - - logger.info('Creating ceph-nvmeof config...') - configfs_dir = os.path.join(data_dir, 'configfs') - makedirs(configfs_dir, uid, gid, 0o755) - - # populate files from the config-json - populate_files(data_dir, self.files, uid, gid) - - @staticmethod - def configfs_mount_umount(data_dir, mount=True): - # type: (str, bool) -> List[str] - mount_path = os.path.join(data_dir, 'configfs') - if mount: - cmd = 'if ! grep -qs {0} /proc/mounts; then ' \ - 'mount -t configfs none {0}; fi'.format(mount_path) - else: - cmd = 'if grep -qs {0} /proc/mounts; then ' \ - 'umount {0}; fi'.format(mount_path) - return cmd.split() - - @staticmethod - def get_sysctl_settings() -> List[str]: - return [ - 'vm.nr_hugepages = 4096', - ] - - def container(self, ctx: CephadmContext) -> CephContainer: - ctr = get_container(ctx, self.identity) - return to_deployment_container(ctx, ctr) - - def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: - return 167, 167 # TODO: need to get properly the uid/gid - - def config_and_keyring( - self, ctx: CephadmContext - ) -> Tuple[Optional[str], Optional[str]]: - return get_config_and_keyring(ctx) - - def customize_container_args( - self, ctx: CephadmContext, args: List[str] - ) -> None: - args.append(ctx.container_engine.unlimited_pids_option) - args.extend(['--ulimit', 'memlock=-1:-1']) - args.extend(['--ulimit', 'nofile=10240']) - args.extend(['--cap-add=SYS_ADMIN', '--cap-add=CAP_SYS_NICE']) - - -################################## - - -@register_daemon_form -class CephExporter(ContainerDaemonForm): - """Defines a Ceph exporter container""" - - daemon_type = 'ceph-exporter' - entrypoint = '/usr/bin/ceph-exporter' - DEFAULT_PORT = 9926 - port_map = { - 'ceph-exporter': DEFAULT_PORT, - } - - @classmethod - def for_daemon_type(cls, daemon_type: str) -> bool: - return cls.daemon_type == daemon_type - - def __init__(self, - ctx: CephadmContext, - fsid: str, daemon_id: Union[int, str], - config_json: Dict[str, Any], - image: str = DEFAULT_IMAGE) -> None: - self.ctx = ctx - self.fsid = fsid - self.daemon_id = daemon_id - self.image = image - - self.sock_dir = config_json.get('sock-dir', '/var/run/ceph/') - ipv4_addrs, _ = get_ip_addresses(get_hostname()) - addrs = '0.0.0.0' if ipv4_addrs else '::' - self.addrs = config_json.get('addrs', addrs) - self.port = config_json.get('port', self.DEFAULT_PORT) - self.prio_limit = config_json.get('prio-limit', 5) - self.stats_period = config_json.get('stats-period', 5) - - self.validate() - - @classmethod - def init(cls, ctx: CephadmContext, fsid: str, - daemon_id: Union[int, str]) -> 'CephExporter': - return cls(ctx, fsid, daemon_id, - fetch_configs(ctx), ctx.image) - - @classmethod - def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'CephExporter': - return cls.init(ctx, ident.fsid, ident.daemon_id) - - @property - def identity(self) -> DaemonIdentity: - return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) - - def get_daemon_args(self) -> List[str]: - args = [ - f'--sock-dir={self.sock_dir}', - f'--addrs={self.addrs}', - f'--port={self.port}', - f'--prio-limit={self.prio_limit}', - f'--stats-period={self.stats_period}', - ] - return args - - def validate(self) -> None: - if not os.path.isdir(self.sock_dir): - raise Error(f'Directory does not exist. Got: {self.sock_dir}') - - def container(self, ctx: CephadmContext) -> CephContainer: - ctr = get_container(ctx, self.identity) - return to_deployment_container(ctx, ctr) - - def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: - return extract_uid_gid(ctx) - - def config_and_keyring( - self, ctx: CephadmContext - ) -> Tuple[Optional[str], Optional[str]]: - return get_config_and_keyring(ctx) - - def customize_container_mounts( - self, ctx: CephadmContext, mounts: Dict[str, str] - ) -> None: - cm = Ceph.get_ceph_mounts(ctx, self.identity) - mounts.update(cm) - - def customize_process_args( - self, ctx: CephadmContext, args: List[str] - ) -> None: - name = 'client.ceph-exporter.%s' % self.identity.daemon_id - args.extend(['-n', name, '-f']) - args.extend(self.get_daemon_args()) - - def customize_container_args( - self, ctx: CephadmContext, args: List[str] - ) -> None: - args.append(ctx.container_engine.unlimited_pids_option) - - def customize_container_envs( - self, ctx: CephadmContext, envs: List[str] - ) -> None: - envs.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728') - - def default_entrypoint(self) -> str: - return self.entrypoint - - -################################## - - -@register_daemon_form -class HAproxy(ContainerDaemonForm): - """Defines an HAproxy container""" - daemon_type = 'haproxy' - required_files = ['haproxy.cfg'] - default_image = DEFAULT_HAPROXY_IMAGE - - @classmethod - def for_daemon_type(cls, daemon_type: str) -> bool: - return cls.daemon_type == daemon_type - - def __init__(self, - ctx: CephadmContext, - fsid: str, daemon_id: Union[int, str], - config_json: Dict, image: str) -> None: - self.ctx = ctx - self.fsid = fsid - self.daemon_id = daemon_id - self.image = image - - # config-json options - self.files = dict_get(config_json, 'files', {}) - - self.validate() - - @classmethod - def init(cls, ctx: CephadmContext, - fsid: str, daemon_id: Union[int, str]) -> 'HAproxy': - return cls(ctx, fsid, daemon_id, fetch_configs(ctx), - ctx.image) - - @classmethod - def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'HAproxy': - return cls.init(ctx, ident.fsid, ident.daemon_id) - - @property - def identity(self) -> DaemonIdentity: - return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) - - def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: - """Create files under the container data dir""" - if not os.path.isdir(data_dir): - raise OSError('data_dir is not a directory: %s' % (data_dir)) - - # create additional directories in data dir for HAproxy to use - if not os.path.isdir(os.path.join(data_dir, 'haproxy')): - makedirs(os.path.join(data_dir, 'haproxy'), uid, gid, DATA_DIR_MODE) - - data_dir = os.path.join(data_dir, 'haproxy') - populate_files(data_dir, self.files, uid, gid) - - def get_daemon_args(self) -> List[str]: - return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg'] - - def validate(self): - # type: () -> None - if not is_fsid(self.fsid): - raise Error('not an fsid: %s' % self.fsid) - if not self.daemon_id: - raise Error('invalid daemon_id: %s' % self.daemon_id) - if not self.image: - raise Error('invalid image: %s' % self.image) - - # check for the required files - if self.required_files: - for fname in self.required_files: - if fname not in self.files: - raise Error('required file missing from config-json: %s' % fname) - - def get_daemon_name(self): - # type: () -> str - return '%s.%s' % (self.daemon_type, self.daemon_id) - - def get_container_name(self, desc=None): - # type: (Optional[str]) -> str - cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) - if desc: - cname = '%s-%s' % (cname, desc) - return cname - - def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: - # better directory for this? - return extract_uid_gid(self.ctx, file_path='/var/lib') - - @staticmethod - def _get_container_mounts(data_dir: str) -> Dict[str, str]: - mounts = dict() - mounts[os.path.join(data_dir, 'haproxy')] = '/var/lib/haproxy' - return mounts - - def customize_container_mounts( - self, ctx: CephadmContext, mounts: Dict[str, str] - ) -> None: - data_dir = self.identity.data_dir(ctx.data_dir) - mounts.update(self._get_container_mounts(data_dir)) - - @staticmethod - def get_sysctl_settings() -> List[str]: - return [ - '# IP forwarding and non-local bind', - 'net.ipv4.ip_forward = 1', - 'net.ipv4.ip_nonlocal_bind = 1', - ] - - def container(self, ctx: CephadmContext) -> CephContainer: - ctr = get_container(ctx, self.identity) - return to_deployment_container(ctx, ctr) - - def customize_container_args( - self, ctx: CephadmContext, args: List[str] - ) -> None: - args.extend( - ['--user=root'] - ) # haproxy 2.4 defaults to a different user - - def customize_process_args( - self, ctx: CephadmContext, args: List[str] - ) -> None: - args.extend(self.get_daemon_args()) - - -################################## - - -@register_daemon_form -class Keepalived(ContainerDaemonForm): - """Defines an Keepalived container""" - daemon_type = 'keepalived' - required_files = ['keepalived.conf'] - default_image = DEFAULT_KEEPALIVED_IMAGE - - @classmethod - def for_daemon_type(cls, daemon_type: str) -> bool: - return cls.daemon_type == daemon_type - - def __init__(self, - ctx: CephadmContext, - fsid: str, daemon_id: Union[int, str], - config_json: Dict, image: str) -> None: - self.ctx = ctx - self.fsid = fsid - self.daemon_id = daemon_id - self.image = image - - # config-json options - self.files = dict_get(config_json, 'files', {}) - - self.validate() - - @classmethod - def init(cls, ctx: CephadmContext, fsid: str, - daemon_id: Union[int, str]) -> 'Keepalived': - return cls(ctx, fsid, daemon_id, - fetch_configs(ctx), ctx.image) - - @classmethod - def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'Keepalived': - return cls.init(ctx, ident.fsid, ident.daemon_id) - - @property - def identity(self) -> DaemonIdentity: - return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) - - def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: - """Create files under the container data dir""" - if not os.path.isdir(data_dir): - raise OSError('data_dir is not a directory: %s' % (data_dir)) - - # create additional directories in data dir for keepalived to use - if not os.path.isdir(os.path.join(data_dir, 'keepalived')): - makedirs(os.path.join(data_dir, 'keepalived'), uid, gid, DATA_DIR_MODE) - - # populate files from the config-json - populate_files(data_dir, self.files, uid, gid) - - def validate(self): - # type: () -> None - if not is_fsid(self.fsid): - raise Error('not an fsid: %s' % self.fsid) - if not self.daemon_id: - raise Error('invalid daemon_id: %s' % self.daemon_id) - if not self.image: - raise Error('invalid image: %s' % self.image) - - # check for the required files - if self.required_files: - for fname in self.required_files: - if fname not in self.files: - raise Error('required file missing from config-json: %s' % fname) - - def get_daemon_name(self): - # type: () -> str - return '%s.%s' % (self.daemon_type, self.daemon_id) - - def get_container_name(self, desc=None): - # type: (Optional[str]) -> str - cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) - if desc: - cname = '%s-%s' % (cname, desc) - return cname - - @staticmethod - def get_container_envs(): - # type: () -> List[str] - envs = [ - 'KEEPALIVED_AUTOCONF=false', - 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf', - 'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf', - 'KEEPALIVED_DEBUG=false' - ] - return envs - - @staticmethod - def get_sysctl_settings() -> List[str]: - return [ - '# IP forwarding and non-local bind', - 'net.ipv4.ip_forward = 1', - 'net.ipv4.ip_nonlocal_bind = 1', - ] - - def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: - # better directory for this? - return extract_uid_gid(self.ctx, file_path='/var/lib') - - @staticmethod - def _get_container_mounts(data_dir: str) -> Dict[str, str]: - mounts = dict() - mounts[os.path.join(data_dir, 'keepalived.conf')] = '/etc/keepalived/keepalived.conf' - return mounts - - def customize_container_mounts( - self, ctx: CephadmContext, mounts: Dict[str, str] - ) -> None: - data_dir = self.identity.data_dir(ctx.data_dir) - mounts.update(self._get_container_mounts(data_dir)) - - def container(self, ctx: CephadmContext) -> CephContainer: - ctr = get_container(ctx, self.identity) - return to_deployment_container(ctx, ctr) - - def customize_container_envs( - self, ctx: CephadmContext, envs: List[str] - ) -> None: - envs.extend(self.get_container_envs()) - - def customize_container_args( - self, ctx: CephadmContext, args: List[str] - ) -> None: - args.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW']) - - -################################## - - -@register_daemon_form -class Tracing(ContainerDaemonForm): - """Define the configs for the jaeger tracing containers""" - - components: Dict[str, Dict[str, Any]] = { - 'elasticsearch': { - 'image': DEFAULT_ELASTICSEARCH_IMAGE, - 'envs': ['discovery.type=single-node'] - }, - 'jaeger-agent': { - 'image': DEFAULT_JAEGER_AGENT_IMAGE, - }, - 'jaeger-collector': { - 'image': DEFAULT_JAEGER_COLLECTOR_IMAGE, - }, - 'jaeger-query': { - 'image': DEFAULT_JAEGER_QUERY_IMAGE, - }, - } # type: ignore - - @classmethod - def for_daemon_type(cls, daemon_type: str) -> bool: - return daemon_type in cls.components - - @staticmethod - def set_configuration(config: Dict[str, str], daemon_type: str) -> None: - if daemon_type in ['jaeger-collector', 'jaeger-query']: - assert 'elasticsearch_nodes' in config - Tracing.components[daemon_type]['envs'] = [ - 'SPAN_STORAGE_TYPE=elasticsearch', - f'ES_SERVER_URLS={config["elasticsearch_nodes"]}'] - if daemon_type == 'jaeger-agent': - assert 'collector_nodes' in config - Tracing.components[daemon_type]['daemon_args'] = [ - f'--reporter.grpc.host-port={config["collector_nodes"]}', - '--processor.jaeger-compact.server-host-port=6799' - ] - - def __init__(self, ident: DaemonIdentity) -> None: - self._identity = ident - self._configured = False - - def _configure(self, ctx: CephadmContext) -> None: - if self._configured: - return - config = fetch_configs(ctx) - # Currently, this method side-effects the class attribute, and that - # is unpleasant. In the future it would be nice to move all of - # set_configuration into _confiure and only modify each classes data - # independently - self.set_configuration(config, self.identity.daemon_type) - self._configured = True - - @classmethod - def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'Tracing': - return cls(ident) - - @property - def identity(self) -> DaemonIdentity: - return self._identity - - def container(self, ctx: CephadmContext) -> CephContainer: - ctr = get_container(ctx, self.identity) - return to_deployment_container(ctx, ctr) - - def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: - return 65534, 65534 - - def get_daemon_args(self) -> List[str]: - return self.components[self.identity.daemon_type].get( - 'daemon_args', [] - ) - - def customize_process_args( - self, ctx: CephadmContext, args: List[str] - ) -> None: - self._configure(ctx) - # earlier code did an explicit check if the daemon type was jaeger-agent - # and would only call get_daemon_args if that was true. However, since - # the function only returns a non-empty list in the case of jaeger-agent - # that check is unnecessary and is not brought over. - args.extend(self.get_daemon_args()) - - def customize_container_envs( - self, ctx: CephadmContext, envs: List[str] - ) -> None: - self._configure(ctx) - envs.extend( - self.components[self.identity.daemon_type].get('envs', []) - ) - - def default_entrypoint(self) -> str: - return '' - - -################################## - - -@register_daemon_form -class CustomContainer(ContainerDaemonForm): - """Defines a custom container""" - daemon_type = 'container' - - @classmethod - def for_daemon_type(cls, daemon_type: str) -> bool: - return cls.daemon_type == daemon_type - - def __init__(self, - fsid: str, daemon_id: Union[int, str], - config_json: Dict, image: str) -> None: - self.fsid = fsid - self.daemon_id = daemon_id - self.image = image - - # config-json options - self.entrypoint = dict_get(config_json, 'entrypoint') - self.uid = dict_get(config_json, 'uid', 65534) # nobody - self.gid = dict_get(config_json, 'gid', 65534) # nobody - self.volume_mounts = dict_get(config_json, 'volume_mounts', {}) - self.args = dict_get(config_json, 'args', []) - self.envs = dict_get(config_json, 'envs', []) - self.privileged = dict_get(config_json, 'privileged', False) - self.bind_mounts = dict_get(config_json, 'bind_mounts', []) - self.ports = dict_get(config_json, 'ports', []) - self.dirs = dict_get(config_json, 'dirs', []) - self.files = dict_get(config_json, 'files', {}) - - @classmethod - def init(cls, ctx: CephadmContext, - fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer': - return cls(fsid, daemon_id, - fetch_configs(ctx), ctx.image) - - @classmethod - def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'CustomContainer': - return cls.init(ctx, ident.fsid, ident.daemon_id) - - @property - def identity(self) -> DaemonIdentity: - return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) - - def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: - """ - Create dirs/files below the container data directory. - """ - logger.info('Creating custom container configuration ' - 'dirs/files in {} ...'.format(data_dir)) - - if not os.path.isdir(data_dir): - raise OSError('data_dir is not a directory: %s' % data_dir) - - for dir_path in self.dirs: - logger.info('Creating directory: {}'.format(dir_path)) - dir_path = os.path.join(data_dir, dir_path.strip('/')) - makedirs(dir_path, uid, gid, 0o755) - - for file_path in self.files: - logger.info('Creating file: {}'.format(file_path)) - content = dict_get_join(self.files, file_path) - file_path = os.path.join(data_dir, file_path.strip('/')) - with write_new(file_path, owner=(uid, gid), encoding='utf-8') as f: - f.write(content) - - def get_daemon_args(self) -> List[str]: - return [] - - def get_container_args(self) -> List[str]: - return self.args - - def get_container_envs(self) -> List[str]: - return self.envs - - def _get_container_mounts(self, data_dir: str) -> Dict[str, str]: - """ - Get the volume mounts. Relative source paths will be located below - `/var/lib/ceph/<cluster-fsid>/<daemon-name>`. - - Example: - { - /foo/conf: /conf - foo/conf: /conf - } - becomes - { - /foo/conf: /conf - /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf - } - """ - mounts = {} - for source, destination in self.volume_mounts.items(): - source = os.path.join(data_dir, source) - mounts[source] = destination - return mounts - - def customize_container_mounts( - self, ctx: CephadmContext, mounts: Dict[str, str] - ) -> None: - data_dir = self.identity.data_dir(ctx.data_dir) - mounts.update(self._get_container_mounts(data_dir)) - - def _get_container_binds(self, data_dir: str) -> List[List[str]]: - """ - Get the bind mounts. Relative `source=...` paths will be located below - `/var/lib/ceph/<cluster-fsid>/<daemon-name>`. - - Example: - [ - 'type=bind', - 'source=lib/modules', - 'destination=/lib/modules', - 'ro=true' - ] - becomes - [ - ... - 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules', - ... - ] - """ - binds = self.bind_mounts.copy() - for bind in binds: - for index, value in enumerate(bind): - match = re.match(r'^source=(.+)$', value) - if match: - bind[index] = 'source={}'.format(os.path.join( - data_dir, match.group(1))) - return binds - - def customize_container_binds( - self, ctx: CephadmContext, binds: List[List[str]] - ) -> None: - data_dir = self.identity.data_dir(ctx.data_dir) - binds.extend(self._get_container_binds(data_dir)) - - # Cache the container so we don't need to rebuild it again when calling - # into init_containers - _container: Optional[CephContainer] = None - - def container(self, ctx: CephadmContext) -> CephContainer: - if self._container is None: - ctr = get_container( - ctx, - self.identity, - privileged=self.privileged, - ptrace=ctx.allow_ptrace, - ) - self._container = to_deployment_container(ctx, ctr) - return self._container - - def init_containers(self, ctx: CephadmContext) -> List[InitContainer]: - primary = self.container(ctx) - init_containers: List[Dict[str, Any]] = getattr( - ctx, 'init_containers', [] - ) - return [ - InitContainer.from_primary_and_opts(ctx, primary, ic_opts) - for ic_opts in init_containers - ] - - def customize_container_endpoints( - self, endpoints: List[EndPoint], deployment_type: DeploymentType - ) -> None: - if deployment_type == DeploymentType.DEFAULT: - endpoints.extend([EndPoint('0.0.0.0', p) for p in self.ports]) - - def customize_container_envs( - self, ctx: CephadmContext, envs: List[str] - ) -> None: - envs.extend(self.get_container_envs()) - - def customize_container_args( - self, ctx: CephadmContext, args: List[str] - ) -> None: - args.extend(self.get_container_args()) - - def customize_process_args( - self, ctx: CephadmContext, args: List[str] - ) -> None: - args.extend(self.get_daemon_args()) - - def default_entrypoint(self) -> str: - return self.entrypoint or '' - - def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: - return self.uid, self.gid - - -################################## - - def get_supported_daemons(): # type: () -> List[str] supported_daemons = ceph_daemons() @@ -2198,15 +226,6 @@ def get_supported_daemons(): assert len(supported_daemons) == len(set(supported_daemons)) return supported_daemons - -def ceph_daemons() -> List[str]: - cds = list(Ceph._daemons) - cds.append(CephExporter.daemon_type) - return cds - -################################## - - ################################## @@ -2829,76 +848,11 @@ def get_container_mounts_for_type( """Return a dictionary mapping container-external paths to container-internal paths given an fsid and daemon_type. """ - mounts = _get_container_mounts_for_type(ctx, fsid, daemon_type) + mounts = get_ceph_mounts_for_type(ctx, fsid, daemon_type) _update_podman_mounts(ctx, mounts) return mounts -def _get_container_mounts_for_type( - ctx: CephadmContext, fsid: str, daemon_type: str -) -> Dict[str, str]: - """The main implementation of get_container_mounts_for_type minus the call - to _update_podman_mounts so that this can be called from - get_container_mounts. - """ - mounts = dict() - - if daemon_type in ceph_daemons(): - if fsid: - run_path = os.path.join('/var/run/ceph', fsid) - if os.path.exists(run_path): - mounts[run_path] = '/var/run/ceph:z' - log_dir = get_log_dir(fsid, ctx.log_dir) - mounts[log_dir] = '/var/log/ceph:z' - crash_dir = '/var/lib/ceph/%s/crash' % fsid - if os.path.exists(crash_dir): - mounts[crash_dir] = '/var/lib/ceph/crash:z' - if daemon_type != 'crash' and should_log_to_journald(ctx): - journald_sock_dir = '/run/systemd/journal' - mounts[journald_sock_dir] = journald_sock_dir - - if daemon_type in ['mon', 'osd', 'clusterless-ceph-volume']: - mounts['/dev'] = '/dev' # FIXME: narrow this down? - mounts['/run/udev'] = '/run/udev' - if daemon_type in ['osd', 'clusterless-ceph-volume']: - mounts['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ... - mounts['/run/lvm'] = '/run/lvm' - mounts['/run/lock/lvm'] = '/run/lock/lvm' - if daemon_type == 'osd': - # selinux-policy in the container may not match the host. - if HostFacts(ctx).selinux_enabled: - cluster_dir = f'{ctx.data_dir}/{fsid}' - selinux_folder = f'{cluster_dir}/selinux' - if os.path.exists(cluster_dir): - if not os.path.exists(selinux_folder): - os.makedirs(selinux_folder, mode=0o755) - mounts[selinux_folder] = '/sys/fs/selinux:ro' - else: - logger.error(f'Cluster direcotry {cluster_dir} does not exist.') - mounts['/'] = '/rootfs' - - try: - if ctx.shared_ceph_folder: # make easy manager modules/ceph-volume development - ceph_folder = pathify(ctx.shared_ceph_folder) - if os.path.exists(ceph_folder): - cephadm_binary = ceph_folder + '/src/cephadm/cephadm' - if not os.path.exists(pathify(cephadm_binary)): - raise Error("cephadm binary does not exist. Please run './build.sh cephadm' from ceph/src/cephadm/ directory.") - mounts[cephadm_binary] = '/usr/sbin/cephadm' - mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume' - mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr' - mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph' - mounts[ceph_folder + '/monitoring/ceph-mixin/dashboards_out'] = '/etc/grafana/dashboards/ceph-dashboard' - mounts[ceph_folder + '/monitoring/ceph-mixin/prometheus_alerts.yml'] = '/etc/prometheus/ceph/ceph_default_alerts.yml' - else: - logger.error( - 'Ceph shared source folder does not exist.', - extra=Highlight.FAILURE.extra()) - except AttributeError: - pass - return mounts - - def get_container_mounts( ctx: CephadmContext, ident: 'DaemonIdentity', no_config: bool = False ) -> Dict[str, str]: @@ -2961,107 +915,13 @@ def get_ceph_volume_container(ctx: CephadmContext, def get_container( ctx: CephadmContext, ident: 'DaemonIdentity', - privileged: bool = False, - ptrace: bool = False, - container_args: Optional[List[str]] = None, ) -> 'CephContainer': - entrypoint: str = '' - d_args: List[str] = [] - envs: List[str] = [] - host_network: bool = True - binds: List[List[str]] = [] - mounts: Dict[str, str] = {} - - daemon_type = ident.daemon_type - if container_args is None: - container_args = [] - if Ceph.for_daemon_type(daemon_type) or OSD.for_daemon_type(daemon_type): - ceph_daemon = daemon_form_create(ctx, ident) - assert isinstance(ceph_daemon, ContainerDaemonForm) - entrypoint = ceph_daemon.default_entrypoint() - ceph_daemon.customize_container_envs(ctx, envs) - ceph_daemon.customize_container_args(ctx, container_args) - ceph_daemon.customize_process_args(ctx, d_args) - mounts = get_container_mounts(ctx, ident) - if daemon_type in ['mon', 'osd']: - # mon and osd need privileged in order for libudev to query devices - privileged = True - if daemon_type in Monitoring.components: - monitoring = Monitoring.create(ctx, ident) - entrypoint = monitoring.default_entrypoint() - monitoring.customize_container_args(ctx, container_args) - monitoring.customize_process_args(ctx, d_args) - mounts = get_container_mounts(ctx, ident) - elif daemon_type in Tracing.components: - tracing = Tracing.create(ctx, ident) - entrypoint = tracing.default_entrypoint() - tracing.customize_container_envs(ctx, envs) - tracing.customize_process_args(ctx, d_args) - elif daemon_type == NFSGanesha.daemon_type: - nfs_ganesha = NFSGanesha.create(ctx, ident) - entrypoint = nfs_ganesha.default_entrypoint() - nfs_ganesha.customize_container_envs(ctx, envs) - nfs_ganesha.customize_container_args(ctx, container_args) - nfs_ganesha.customize_process_args(ctx, d_args) - mounts = get_container_mounts(ctx, ident) - elif daemon_type == CephExporter.daemon_type: - ceph_exporter = CephExporter.create(ctx, ident) - entrypoint = ceph_exporter.default_entrypoint() - ceph_exporter.customize_container_envs(ctx, envs) - ceph_exporter.customize_container_args(ctx, container_args) - ceph_exporter.customize_process_args(ctx, d_args) - mounts = get_container_mounts(ctx, ident) - elif daemon_type == HAproxy.daemon_type: - haproxy = HAproxy.create(ctx, ident) - haproxy.customize_container_args(ctx, container_args) - haproxy.customize_process_args(ctx, d_args) - mounts = get_container_mounts(ctx, ident) - elif daemon_type == Keepalived.daemon_type: - keepalived = Keepalived.create(ctx, ident) - keepalived.customize_container_envs(ctx, envs) - keepalived.customize_container_args(ctx, container_args) - mounts = get_container_mounts(ctx, ident) - elif daemon_type == CephNvmeof.daemon_type: - nvmeof = CephNvmeof.create(ctx, ident) - nvmeof.customize_container_args(ctx, container_args) - binds = get_container_binds(ctx, ident) - mounts = get_container_mounts(ctx, ident) - elif daemon_type == CephIscsi.daemon_type: - iscsi = CephIscsi.create(ctx, ident) - entrypoint = iscsi.default_entrypoint() - iscsi.customize_container_args(ctx, container_args) - # So the container can modprobe iscsi_target_mod and have write perms - # to configfs we need to make this a privileged container. - privileged = True - binds = get_container_binds(ctx, ident) - mounts = get_container_mounts(ctx, ident) - elif daemon_type == CustomContainer.daemon_type: - cc = CustomContainer.init(ctx, ident.fsid, ident.daemon_id) - entrypoint = cc.default_entrypoint() - host_network = False - cc.customize_container_envs(ctx, envs) - cc.customize_container_args(ctx, container_args) - cc.customize_process_args(ctx, d_args) - binds = get_container_binds(ctx, ident) - mounts = get_container_mounts(ctx, ident) - elif daemon_type == SNMPGateway.daemon_type: - sg = SNMPGateway.create(ctx, ident) - sg.customize_container_args(ctx, container_args) - sg.customize_process_args(ctx, d_args) - - _update_container_args_for_podman(ctx, ident, container_args) - return CephContainer.for_daemon( - ctx, - ident=ident, - entrypoint=entrypoint, - args=d_args, - container_args=container_args, - volume_mounts=mounts, - bind_mounts=binds, - envs=envs, - privileged=privileged, - ptrace=ptrace, - host_network=host_network, + daemon = daemon_form_create(ctx, ident) + assert isinstance(daemon, ContainerDaemonForm) + privileged = ident.daemon_type in {'mon', 'osd', CephIscsi.daemon_type} + host_network = ident.daemon_type != CustomContainer.daemon_type + return daemon_to_container( + ctx, daemon, privileged=privileged, host_network=host_network ) @@ -3196,97 +1056,6 @@ def deploy_daemon( call_throws(ctx, ['systemctl', 'restart', ident.unit_name]) -def _bash_cmd( - fh: IO[str], - cmd: List[str], - check: bool = True, - background: bool = False, - stderr: bool = True, -) -> None: - line = ' '.join(shlex.quote(arg) for arg in cmd) - if not check: - line = f'! {line}' - if not stderr: - line = f'{line} 2> /dev/null' - if background: - line = f'{line} &' - fh.write(line) - fh.write('\n') - - -def _write_container_cmd_to_bash( - ctx: CephadmContext, - file_obj: IO[str], - container: 'CephContainer', - comment: Optional[str] = None, - background: Optional[bool] = False, -) -> None: - if comment: - # Sometimes adding a comment, especially if there are multiple containers in one - # unit file, makes it easier to read and grok. - assert '\n' not in comment - file_obj.write(f'# {comment}\n') - # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually - _bash_cmd( - file_obj, container.rm_cmd(old_cname=True), check=False, stderr=False - ) - _bash_cmd(file_obj, container.rm_cmd(), check=False, stderr=False) - - # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage` - if isinstance(ctx.container_engine, Podman): - _bash_cmd( - file_obj, - container.rm_cmd(storage=True), - check=False, - stderr=False, - ) - _bash_cmd( - file_obj, - container.rm_cmd(old_cname=True, storage=True), - check=False, - stderr=False, - ) - - # container run command - _bash_cmd(file_obj, container.run_cmd(), background=bool(background)) - - -def _write_init_container_cmds( - ctx: CephadmContext, - file_obj: IO[str], - index: int, - init_container: 'InitContainer', -) -> None: - file_obj.write(f'# init container {index}: {init_container.cname}\n') - _bash_cmd(file_obj, init_container.run_cmd()) - _write_init_container_cmds_clean(ctx, file_obj, init_container, comment='') - - -def _write_init_container_cmds_clean( - ctx: CephadmContext, - file_obj: IO[str], - init_container: 'InitContainer', - comment: str = 'init container cleanup', -) -> None: - if comment: - assert '\n' not in comment - file_obj.write(f'# {comment}\n') - _bash_cmd( - file_obj, - init_container.rm_cmd(), - check=False, - stderr=False, - ) - # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage` - if isinstance(ctx.container_engine, Podman): - _bash_cmd( - file_obj, - init_container.rm_cmd(storage=True), - check=False, - stderr=False, - ) - - def clean_cgroup(ctx: CephadmContext, fsid: str, unit_name: str) -> None: # systemd may fail to cleanup cgroups from previous stopped unit, which will cause next "systemctl start" to fail. # see https://tracker.ceph.com/issues/50998 @@ -3324,85 +1093,44 @@ def deploy_daemon_units( endpoints: Optional[List[EndPoint]] = None, init_containers: Optional[List['InitContainer']] = None, ) -> None: - # cmd - - # unpack values from ident because they're used very frequently - fsid = ident.fsid - daemon_type = ident.daemon_type - daemon_id = ident.daemon_id - data_dir = ident.data_dir(ctx.data_dir) - run_file_path = data_dir + '/unit.run' - meta_file_path = data_dir + '/unit.meta' - with write_new(run_file_path) as f, write_new(meta_file_path) as metaf: - - f.write('set -e\n') - - if daemon_type in ceph_daemons(): - install_path = find_program('install') - f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid)) + pre_start_commands: List[runscripts.Command] = [] + post_stop_commands: List[runscripts.Command] = [] + + if ident.daemon_type in ceph_daemons(): + install_path = find_program('install') + pre_start_commands.append('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=ident.fsid, uid=uid, gid=gid)) + if ident.daemon_type == 'osd': + assert osd_fsid + pre_start_commands.extend(_osd_unit_run_commands( + ctx, ident, osd_fsid, data_dir, uid, gid + )) + post_stop_commands.extend( + _osd_unit_poststop_commands(ctx, ident, osd_fsid) + ) + if ident.daemon_type == CephIscsi.daemon_type: + pre_start_commands.extend(_iscsi_unit_run_commands(ctx, ident, data_dir)) + post_stop_commands.extend(_iscsi_unit_poststop_commands(ctx, ident, data_dir)) - # pre-start cmd(s) - if daemon_type == 'osd': - assert osd_fsid - _write_osd_unit_run_commands( - ctx, f, ident, osd_fsid, data_dir, uid, gid - ) - elif daemon_type == CephIscsi.daemon_type: - _write_iscsi_unit_run_commands(ctx, f, ident, data_dir) - init_containers = init_containers or [] - if init_containers: - _write_init_container_cmds_clean(ctx, f, init_containers[0]) - for idx, ic in enumerate(init_containers): - _write_init_container_cmds(ctx, f, idx, ic) - - _write_container_cmd_to_bash(ctx, f, container, '%s.%s' % (daemon_type, str(daemon_id))) - - # some metadata about the deploy - meta: Dict[str, Any] = fetch_meta(ctx) - meta.update({ - 'memory_request': int(ctx.memory_request) if ctx.memory_request else None, - 'memory_limit': int(ctx.memory_limit) if ctx.memory_limit else None, - }) - if not meta.get('ports'): - if endpoints: - meta['ports'] = [e.port for e in endpoints] - else: - meta['ports'] = [] - metaf.write(json.dumps(meta, indent=4) + '\n') - - timeout = 30 if daemon_type == 'osd' else None - # post-stop command(s) - with write_new(data_dir + '/unit.poststop') as f: - # this is a fallback to eventually stop any underlying container that was not stopped properly by unit.stop, - # this could happen in very slow setups as described in the issue https://tracker.ceph.com/issues/58242. - _write_stop_actions(ctx, cast(TextIO, f), container, timeout) - if daemon_type == 'osd': - assert osd_fsid - _write_osd_unit_poststop_commands(ctx, f, ident, osd_fsid) - elif daemon_type == CephIscsi.daemon_type: - _write_iscsi_unit_poststop_commands(ctx, f, ident, data_dir) - - # post-stop command(s) - with write_new(data_dir + '/unit.stop') as f: - _write_stop_actions(ctx, cast(TextIO, f), container, timeout) - - if container: - with write_new(data_dir + '/unit.image') as f: - f.write(container.image + '\n') + runscripts.write_service_scripts( + ctx, + ident, + container=container, + init_containers=init_containers, + endpoints=endpoints, + pre_start_commands=pre_start_commands, + post_stop_commands=post_stop_commands, + timeout=30 if ident.daemon_type == 'osd' else None, + ) # sysctl - install_sysctl(ctx, fsid, daemon_form_create(ctx, ident)) + install_sysctl(ctx, ident.fsid, daemon_form_create(ctx, ident)) # systemd - install_base_units(ctx, fsid) - unit = get_unit_file(ctx, fsid) - unit_file = 'ceph-%s@.service' % (fsid) - with write_new(ctx.unit_dir + '/' + unit_file, perms=None) as f: - f.write(unit) + systemd_unit.update_files(ctx, ident) call_throws(ctx, ['systemctl', 'daemon-reload']) - unit_name = get_unit_name(fsid, daemon_type, daemon_id) + unit_name = get_unit_name(ident.fsid, ident.daemon_type, ident.daemon_id) call(ctx, ['systemctl', 'stop', unit_name], verbosity=CallVerbosity.DEBUG) call(ctx, ['systemctl', 'reset-failed', unit_name], @@ -3410,38 +1138,27 @@ def deploy_daemon_units( if enable: call_throws(ctx, ['systemctl', 'enable', unit_name]) if start: - clean_cgroup(ctx, fsid, unit_name) + clean_cgroup(ctx, ident.fsid, unit_name) call_throws(ctx, ['systemctl', 'start', unit_name]) -def _write_stop_actions( - ctx: CephadmContext, f: TextIO, container: 'CephContainer', timeout: Optional[int] -) -> None: - # following generated script basically checks if the container exists - # before stopping it. Exit code will be success either if it doesn't - # exist or if it exists and is stopped successfully. - container_exists = f'{ctx.container_engine.path} inspect %s &>/dev/null' - f.write(f'! {container_exists % container.old_cname} || {" ".join(container.stop_cmd(old_cname=True, timeout=timeout))} \n') - f.write(f'! {container_exists % container.cname} || {" ".join(container.stop_cmd(timeout=timeout))} \n') - - -def _write_osd_unit_run_commands( +def _osd_unit_run_commands( ctx: CephadmContext, - f: IO, ident: 'DaemonIdentity', osd_fsid: str, data_dir: str, uid: int, gid: int, -) -> None: +) -> List[runscripts.Command]: + cmds: List[runscripts.Command] = [] # osds have a pre-start step simple_fn = os.path.join('/etc/ceph/osd', '%s-%s.json.adopted-by-cephadm' % (ident.daemon_id, osd_fsid)) if os.path.exists(simple_fn): - f.write('# Simple OSDs need chown on startup:\n') + cmds.append('# Simple OSDs need chown on startup:\n') for n in ['block', 'block.db', 'block.wal']: p = os.path.join(data_dir, n) - f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid)) + cmds.append('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid)) else: # if ceph-volume does not support 'ceph-volume activate', we must # do 'ceph-volume lvm activate'. @@ -3481,21 +1198,24 @@ def _write_osd_unit_run_commands( bind_mounts=get_container_binds(ctx, ident), cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id), ) - _write_container_cmd_to_bash(ctx, f, prestart, 'LVM OSDs use ceph-volume lvm activate') + cmds.append(runscripts.ContainerCommand(prestart, comment='LVM OSDs use ceph-volume lvm activate')) + return cmds -def _write_iscsi_unit_run_commands( - ctx: CephadmContext, f: IO, ident: 'DaemonIdentity', data_dir: str -) -> None: - f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n') +def _iscsi_unit_run_commands( + ctx: CephadmContext, ident: 'DaemonIdentity', data_dir: str +) -> List[runscripts.Command]: + cmds: List[runscripts.Command] = [] + cmds.append(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n') ceph_iscsi = CephIscsi.init(ctx, ident.fsid, ident.daemon_id) tcmu_container = ceph_iscsi.get_tcmu_runner_container() - _write_container_cmd_to_bash(ctx, f, tcmu_container, 'iscsi tcmu-runner container', background=True) + cmds.append(runscripts.ContainerCommand(tcmu_container, comment='iscsi tcmu-runner container', background=True)) + return cmds -def _write_osd_unit_poststop_commands( - ctx: CephadmContext, f: IO, ident: 'DaemonIdentity', osd_fsid: str -) -> None: +def _osd_unit_poststop_commands( + ctx: CephadmContext, ident: 'DaemonIdentity', osd_fsid: str +) -> List[runscripts.Command]: poststop = get_ceph_volume_container( ctx, args=[ @@ -3506,118 +1226,22 @@ def _write_osd_unit_poststop_commands( bind_mounts=get_container_binds(ctx, ident), cname='ceph-%s-%s.%s-deactivate' % (ident.fsid, ident.daemon_type, ident.daemon_id), ) - _write_container_cmd_to_bash(ctx, f, poststop, 'deactivate osd') + return [runscripts.ContainerCommand(poststop, comment='deactivate osd')] -def _write_iscsi_unit_poststop_commands( - ctx: CephadmContext, f: IO, ident: 'DaemonIdentity', data_dir: str -) -> None: +def _iscsi_unit_poststop_commands( + ctx: CephadmContext, ident: 'DaemonIdentity', data_dir: str +) -> List[runscripts.Command]: # make sure we also stop the tcmu container + cmds: List[runscripts.Command] = [] runtime_dir = '/run' ceph_iscsi = CephIscsi.init(ctx, ident.fsid, ident.daemon_id) tcmu_container = ceph_iscsi.get_tcmu_runner_container() - f.write('! ' + ' '.join(tcmu_container.stop_cmd()) + '\n') - f.write('! ' + 'rm ' + runtime_dir + '/ceph-%s@%s.%s.service-pid' % (ident.fsid, ident.daemon_type, ident.daemon_id + '.tcmu') + '\n') - f.write('! ' + 'rm ' + runtime_dir + '/ceph-%s@%s.%s.service-cid' % (ident.fsid, ident.daemon_type, ident.daemon_id + '.tcmu') + '\n') - f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n') - - -def install_base_units(ctx, fsid): - # type: (CephadmContext, str) -> None - """ - Set up ceph.target and ceph-$fsid.target units. - """ - # global unit - existed = os.path.exists(ctx.unit_dir + '/ceph.target') - with write_new(ctx.unit_dir + '/ceph.target', perms=None) as f: - f.write('[Unit]\n' - 'Description=All Ceph clusters and services\n' - '\n' - '[Install]\n' - 'WantedBy=multi-user.target\n') - if not existed: - # we disable before enable in case a different ceph.target - # (from the traditional package) is present; while newer - # systemd is smart enough to disable the old - # (/lib/systemd/...) and enable the new (/etc/systemd/...), - # some older versions of systemd error out with EEXIST. - call_throws(ctx, ['systemctl', 'disable', 'ceph.target']) - call_throws(ctx, ['systemctl', 'enable', 'ceph.target']) - call_throws(ctx, ['systemctl', 'start', 'ceph.target']) - - # cluster unit - existed = os.path.exists(ctx.unit_dir + '/ceph-%s.target' % fsid) - with write_new(ctx.unit_dir + f'/ceph-{fsid}.target', perms=None) as f: - f.write( - '[Unit]\n' - 'Description=Ceph cluster {fsid}\n' - 'PartOf=ceph.target\n' - 'Before=ceph.target\n' - '\n' - '[Install]\n' - 'WantedBy=multi-user.target ceph.target\n'.format( - fsid=fsid) - ) - if not existed: - call_throws(ctx, ['systemctl', 'enable', 'ceph-%s.target' % fsid]) - call_throws(ctx, ['systemctl', 'start', 'ceph-%s.target' % fsid]) - - # don't overwrite file in order to allow users to manipulate it - if os.path.exists(ctx.logrotate_dir + f'/ceph-{fsid}'): - return - - # logrotate for the cluster - with write_new(ctx.logrotate_dir + f'/ceph-{fsid}', perms=None) as f: - """ - This is a bit sloppy in that the killall/pkill will touch all ceph daemons - in all containers, but I don't see an elegant way to send SIGHUP *just* to - the daemons for this cluster. (1) systemd kill -s will get the signal to - podman, but podman will exit. (2) podman kill will get the signal to the - first child (bash), but that isn't the ceph daemon. This is simpler and - should be harmless. - """ - targets: List[str] = [ - 'ceph-mon', - 'ceph-mgr', - 'ceph-mds', - 'ceph-osd', - 'ceph-fuse', - 'radosgw', - 'rbd-mirror', - 'cephfs-mirror', - 'tcmu-runner' - ] - - f.write("""# created by cephadm -/var/log/ceph/%s/*.log { - rotate 7 - daily - compress - sharedscripts - postrotate - killall -q -1 %s || pkill -1 -x '%s' || true - endscript - missingok - notifempty - su root root -} -""" % (fsid, ' '.join(targets), '|'.join(targets))) - - -def get_unit_file(ctx: CephadmContext, fsid: str) -> str: - has_docker_engine = isinstance(ctx.container_engine, Docker) - has_podman_engine = isinstance(ctx.container_engine, Podman) - has_podman_split_version = ( - has_podman_engine and ctx.container_engine.supports_split_cgroups - ) - return templating.render( - ctx, - templating.Templates.ceph_service, - fsid=fsid, - has_docker_engine=has_docker_engine, - has_podman_engine=has_podman_engine, - has_podman_split_version=has_podman_split_version, - ) + cmds.append('! ' + ' '.join(tcmu_container.stop_cmd()) + '\n') + cmds.append('! ' + 'rm ' + runtime_dir + '/ceph-%s@%s.%s.service-pid' % (ident.fsid, ident.daemon_type, ident.daemon_id + '.tcmu') + '\n') + cmds.append('! ' + 'rm ' + runtime_dir + '/ceph-%s@%s.%s.service-cid' % (ident.fsid, ident.daemon_type, ident.daemon_id + '.tcmu') + '\n') + cmds.append(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n') + return cmds ################################## @@ -3949,7 +1573,7 @@ class CephadmAgent(DaemonForm): ) name_id_mapping: Dict[str, str] = self._parse_container_id_name(code, out) for i in os.listdir(data_dir): - if i in ['mon', 'osd', 'mds', 'mgr']: + if i in ['mon', 'osd', 'mds', 'mgr', 'rgw']: daemon_type = i for j in os.listdir(os.path.join(data_dir, i)): if '-' not in j: @@ -5336,36 +2960,6 @@ def command_registry_login(ctx: CephadmContext) -> int: ################################## -def to_deployment_container( - ctx: CephadmContext, ctr: CephContainer -) -> CephContainer: - """Given a standard ceph container instance return a CephContainer - prepared for a deployment as a daemon, having the extra args and - custom configurations added. - NOTE: The `ctr` object is mutated before being returned. - """ - if 'extra_container_args' in ctx and ctx.extra_container_args: - ctr.container_args.extend(ctx.extra_container_args) - if 'extra_entrypoint_args' in ctx and ctx.extra_entrypoint_args: - ctr.args.extend(ctx.extra_entrypoint_args) - ccfiles = fetch_custom_config_files(ctx) - if ccfiles: - mandatory_keys = ['mount_path', 'content'] - for conf in ccfiles: - if all(k in conf for k in mandatory_keys): - mount_path = conf['mount_path'] - assert ctr.identity - file_path = os.path.join( - ctx.data_dir, - ctr.identity.fsid, - 'custom_config_files', - ctr.identity.daemon_name, - os.path.basename(mount_path) - ) - ctr.volume_mounts[file_path] = mount_path - return ctr - - def get_deployment_type( ctx: CephadmContext, ident: 'DaemonIdentity', ) -> DeploymentType: @@ -5700,21 +3294,16 @@ def command_ceph_volume(ctx): ################################## +@infer_fsid def command_unit_install(ctx): # type: (CephadmContext) -> int - if not ctx.fsid: + if not getattr(ctx, 'fsid', None): raise Error('must pass --fsid to specify cluster') - - fsid = ctx.fsid - install_base_units(ctx, fsid) - unit = get_unit_file(ctx, fsid) - unit_file = 'ceph-%s@.service' % (fsid) - with open(ctx.unit_dir + '/' + unit_file + '.new', 'w') as f: - f.write(unit) - os.rename(ctx.unit_dir + '/' + unit_file + '.new', - ctx.unit_dir + '/' + unit_file) + if not getattr(ctx, 'name', None): + raise Error('daemon name required') + ident = DaemonIdentity.from_context(ctx) + systemd_unit.update_files(ctx, ident) call_throws(ctx, ['systemctl', 'daemon-reload']) - return 0 @@ -5813,7 +3402,7 @@ def list_daemons(ctx, detail=True, legacy_dir=None): # /var/lib/ceph if os.path.exists(data_dir): for i in os.listdir(data_dir): - if i in ['mon', 'osd', 'mds', 'mgr']: + if i in ['mon', 'osd', 'mds', 'mgr', 'rgw']: daemon_type = i for j in os.listdir(os.path.join(data_dir, i)): if '-' not in j: @@ -7460,6 +5049,13 @@ def _get_parser(): parser_unit_install = subparsers.add_parser( 'unit-install', help="Install the daemon's systemd unit") parser_unit_install.set_defaults(func=command_unit_install) + parser_unit_install.add_argument( + '--fsid', + help='cluster FSID') + parser_unit_install.add_argument( + '--name', '-n', + required=True, + help='daemon name (type.id)') parser_logs = subparsers.add_parser( 'logs', help='print journald logs for a daemon container') diff --git a/src/cephadm/cephadmlib/container_daemon_form.py b/src/cephadm/cephadmlib/container_daemon_form.py index ae8b6afe3c6..693b0cf8df0 100644 --- a/src/cephadm/cephadmlib/container_daemon_form.py +++ b/src/cephadm/cephadmlib/container_daemon_form.py @@ -4,6 +4,7 @@ import abc from typing import List, Tuple, Optional, Dict +from .container_engines import Podman from .container_types import CephContainer, InitContainer from .context import CephadmContext from .daemon_form import DaemonForm @@ -110,3 +111,67 @@ class ContainerDaemonForm(DaemonForm): in a container. """ return '' + + +def daemon_to_container( + ctx: CephadmContext, + daemon: ContainerDaemonForm, + *, + privileged: bool = False, + ptrace: bool = False, + host_network: bool = True, + entrypoint: Optional[str] = None, + container_args: Optional[List[str]] = None, + container_mounts: Optional[Dict[str, str]] = None, + container_binds: Optional[List[List[str]]] = None, + envs: Optional[List[str]] = None, + args: Optional[List[str]] = None, + auto_podman_args: bool = True, + auto_podman_mounts: bool = True, +) -> CephContainer: + """daemon_to_container is a utility function that serves to create + CephContainer instances from a container daemon form's customize and + entrypoint methods. + Most of the parameters (like mounts, container_args, etc) can be passed in + to "pre customize" the values. + The auto_podman_args argument enables adding default arguments expected on + all podman daemons (true by default). + The auto_podman_mounts argument enables adding mounts expected on all + daemons running on podman (true by default). + """ + container_args = container_args if container_args else [] + container_mounts = container_mounts if container_mounts else {} + container_binds = container_binds if container_binds else [] + envs = envs if envs else [] + args = args if args else [] + + if entrypoint is None: + entrypoint = daemon.default_entrypoint() + daemon.customize_container_args(ctx, container_args) + daemon.customize_container_mounts(ctx, container_mounts) + daemon.customize_container_binds(ctx, container_binds) + daemon.customize_container_envs(ctx, envs) + daemon.customize_process_args(ctx, args) + + _is_podman = isinstance(ctx.container_engine, Podman) + if auto_podman_mounts and _is_podman: + ctx.container_engine.update_mounts(ctx, container_mounts) + if auto_podman_args and _is_podman: + service_name = f'{daemon.identity.unit_name}.service' + container_args.extend( + ctx.container_engine.service_args(ctx, service_name) + ) + + return CephContainer.for_daemon( + ctx, + ident=daemon.identity, + entrypoint=entrypoint, + args=args, + container_args=container_args, + volume_mounts=container_mounts, + bind_mounts=container_binds, + envs=envs, + privileged=privileged, + ptrace=ptrace, + host_network=host_network, + ) diff --git a/src/cephadm/cephadmlib/daemons/__init__.py b/src/cephadm/cephadmlib/daemons/__init__.py new file mode 100644 index 00000000000..cf572d487c9 --- /dev/null +++ b/src/cephadm/cephadmlib/daemons/__init__.py @@ -0,0 +1,24 @@ +from .ceph import Ceph, OSD, CephExporter +from .custom import CustomContainer +from .ingress import HAproxy, Keepalived +from .iscsi import CephIscsi +from .monitoring import Monitoring +from .nfs import NFSGanesha +from .nvmeof import CephNvmeof +from .snmp import SNMPGateway +from .tracing import Tracing + +__all__ = [ + 'Ceph', + 'CephExporter', + 'CephIscsi', + 'CephNvmeof', + 'CustomContainer', + 'HAproxy', + 'Keepalived', + 'Monitoring', + 'NFSGanesha', + 'OSD', + 'SNMPGateway', + 'Tracing', +] diff --git a/src/cephadm/cephadmlib/daemons/ceph.py b/src/cephadm/cephadmlib/daemons/ceph.py new file mode 100644 index 00000000000..0afb8f734af --- /dev/null +++ b/src/cephadm/cephadmlib/daemons/ceph.py @@ -0,0 +1,462 @@ +import logging +import os + +from typing import Any, Dict, List, Optional, Tuple, Union + +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container +from ..container_types import CephContainer, extract_uid_gid +from ..context_getters import ( + fetch_configs, + get_config_and_keyring, + should_log_to_journald, +) +from ..daemon_form import register as register_daemon_form +from ..daemon_identity import DaemonIdentity +from ..constants import DEFAULT_IMAGE +from ..context import CephadmContext +from ..deployment_utils import to_deployment_container +from ..exceptions import Error +from ..file_utils import make_run_dir, pathify +from ..host_facts import HostFacts +from ..logging import Highlight +from ..net_utils import get_hostname, get_ip_addresses + + +logger = logging.getLogger() + + +@register_daemon_form +class Ceph(ContainerDaemonForm): + _daemons = ( + 'mon', + 'mgr', + 'osd', + 'mds', + 'rgw', + 'rbd-mirror', + 'crash', + 'cephfs-mirror', + ) + + @classmethod + def for_daemon_type(cls, daemon_type: str) -> bool: + # TODO: figure out a way to un-special-case osd + return daemon_type in cls._daemons and daemon_type != 'osd' + + def __init__(self, ctx: CephadmContext, ident: DaemonIdentity) -> None: + self.ctx = ctx + self._identity = ident + self.user_supplied_config = False + + @classmethod + def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'Ceph': + return cls(ctx, ident) + + @property + def identity(self) -> DaemonIdentity: + return self._identity + + def firewall_service_name(self) -> str: + if self.identity.daemon_type == 'mon': + return 'ceph-mon' + elif self.identity.daemon_type in ['mgr', 'mds']: + return 'ceph' + return '' + + def container(self, ctx: CephadmContext) -> CephContainer: + # previous to being a ContainerDaemonForm, this call to create the + # var-run directory was hard coded in the deploy path. Eventually, it + # would be good to move this somwhere cleaner and avoid needing to know + # the uid/gid here. + uid, gid = self.uid_gid(ctx) + make_run_dir(ctx.fsid, uid, gid) + + # mon and osd need privileged in order for libudev to query devices + privileged = self.identity.daemon_type in ['mon', 'osd'] + ctr = daemon_to_container(ctx, self, privileged=privileged) + ctr = to_deployment_container(ctx, ctr) + config_json = fetch_configs(ctx) + if self.identity.daemon_type == 'mon' and config_json is not None: + if 'crush_location' in config_json: + c_loc = config_json['crush_location'] + # was originally "c.args.extend(['--set-crush-location', c_loc])" + # but that doesn't seem to persist in the object after it's passed + # in further function calls + ctr.args = ctr.args + ['--set-crush-location', c_loc] + return ctr + + _uid_gid: Optional[Tuple[int, int]] = None + + def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: + if self._uid_gid is None: + self._uid_gid = extract_uid_gid(ctx) + return self._uid_gid + + def config_and_keyring( + self, ctx: CephadmContext + ) -> Tuple[Optional[str], Optional[str]]: + return get_config_and_keyring(ctx) + + def get_daemon_args(self) -> List[str]: + if self.identity.daemon_type == 'crash': + return [] + r = [ + '--setuser', + 'ceph', + '--setgroup', + 'ceph', + '--default-log-to-file=false', + ] + log_to_journald = should_log_to_journald(self.ctx) + if log_to_journald: + r += [ + '--default-log-to-journald=true', + '--default-log-to-stderr=false', + ] + else: + r += [ + '--default-log-to-stderr=true', + '--default-log-stderr-prefix=debug ', + ] + if self.identity.daemon_type == 'mon': + r += [ + '--default-mon-cluster-log-to-file=false', + ] + if log_to_journald: + r += [ + '--default-mon-cluster-log-to-journald=true', + '--default-mon-cluster-log-to-stderr=false', + ] + else: + r += ['--default-mon-cluster-log-to-stderr=true'] + return r + + @staticmethod + def get_ceph_mounts( + ctx: CephadmContext, + ident: DaemonIdentity, + no_config: bool = False, + ) -> Dict[str, str]: + # Warning: This is a hack done for more expedient refactoring + mounts = get_ceph_mounts_for_type(ctx, ident.fsid, ident.daemon_type) + data_dir = ident.data_dir(ctx.data_dir) + if ident.daemon_type == 'rgw': + cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % ( + ident.daemon_id + ) + else: + cdata_dir = '/var/lib/ceph/%s/ceph-%s' % ( + ident.daemon_type, + ident.daemon_id, + ) + if ident.daemon_type != 'crash': + mounts[data_dir] = cdata_dir + ':z' + if not no_config: + mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z' + if ident.daemon_type in [ + 'rbd-mirror', + 'cephfs-mirror', + 'crash', + 'ceph-exporter', + ]: + # these do not search for their keyrings in a data directory + mounts[ + data_dir + '/keyring' + ] = '/etc/ceph/ceph.client.%s.%s.keyring' % ( + ident.daemon_type, + ident.daemon_id, + ) + return mounts + + def customize_container_mounts( + self, ctx: CephadmContext, mounts: Dict[str, str] + ) -> None: + no_config = bool( + getattr(ctx, 'config', None) and self.user_supplied_config + ) + cm = self.get_ceph_mounts( + ctx, + self.identity, + no_config=no_config, + ) + mounts.update(cm) + + def customize_container_args( + self, ctx: CephadmContext, args: List[str] + ) -> None: + args.append(ctx.container_engine.unlimited_pids_option) + + def customize_process_args( + self, ctx: CephadmContext, args: List[str] + ) -> None: + ident = self.identity + if ident.daemon_type == 'rgw': + name = 'client.rgw.%s' % ident.daemon_id + elif ident.daemon_type == 'rbd-mirror': + name = 'client.rbd-mirror.%s' % ident.daemon_id + elif ident.daemon_type == 'cephfs-mirror': + name = 'client.cephfs-mirror.%s' % ident.daemon_id + elif ident.daemon_type == 'crash': + name = 'client.crash.%s' % ident.daemon_id + elif ident.daemon_type in ['mon', 'mgr', 'mds', 'osd']: + name = ident.daemon_name + else: + raise ValueError(ident) + args.extend(['-n', name]) + if ident.daemon_type != 'crash': + args.append('-f') + args.extend(self.get_daemon_args()) + + def customize_container_envs( + self, ctx: CephadmContext, envs: List[str] + ) -> None: + envs.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728') + + def default_entrypoint(self) -> str: + ep = { + 'rgw': '/usr/bin/radosgw', + 'rbd-mirror': '/usr/bin/rbd-mirror', + 'cephfs-mirror': '/usr/bin/cephfs-mirror', + } + daemon_type = self.identity.daemon_type + return ep.get(daemon_type) or f'/usr/bin/ceph-{daemon_type}' + + +@register_daemon_form +class OSD(Ceph): + @classmethod + def for_daemon_type(cls, daemon_type: str) -> bool: + # TODO: figure out a way to un-special-case osd + return daemon_type == 'osd' + + def __init__( + self, + ctx: CephadmContext, + ident: DaemonIdentity, + osd_fsid: Optional[str] = None, + ) -> None: + super().__init__(ctx, ident) + self._osd_fsid = osd_fsid + + @classmethod + def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'OSD': + osd_fsid = getattr(ctx, 'osd_fsid', None) + if osd_fsid is None: + logger.info( + 'Creating an OSD daemon form without an OSD FSID value' + ) + return cls(ctx, ident, osd_fsid) + + @staticmethod + def get_sysctl_settings() -> List[str]: + return [ + '# allow a large number of OSDs', + 'fs.aio-max-nr = 1048576', + 'kernel.pid_max = 4194304', + ] + + def firewall_service_name(self) -> str: + return 'ceph' + + @property + def osd_fsid(self) -> Optional[str]: + return self._osd_fsid + + +@register_daemon_form +class CephExporter(ContainerDaemonForm): + """Defines a Ceph exporter container""" + + daemon_type = 'ceph-exporter' + entrypoint = '/usr/bin/ceph-exporter' + DEFAULT_PORT = 9926 + port_map = { + 'ceph-exporter': DEFAULT_PORT, + } + + @classmethod + def for_daemon_type(cls, daemon_type: str) -> bool: + return cls.daemon_type == daemon_type + + def __init__( + self, + ctx: CephadmContext, + fsid: str, + daemon_id: Union[int, str], + config_json: Dict[str, Any], + image: str = DEFAULT_IMAGE, + ) -> None: + self.ctx = ctx + self.fsid = fsid + self.daemon_id = daemon_id + self.image = image + + self.sock_dir = config_json.get('sock-dir', '/var/run/ceph/') + ipv4_addrs, _ = get_ip_addresses(get_hostname()) + addrs = '0.0.0.0' if ipv4_addrs else '::' + self.addrs = config_json.get('addrs', addrs) + self.port = config_json.get('port', self.DEFAULT_PORT) + self.prio_limit = config_json.get('prio-limit', 5) + self.stats_period = config_json.get('stats-period', 5) + + self.validate() + + @classmethod + def init( + cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] + ) -> 'CephExporter': + return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image) + + @classmethod + def create( + cls, ctx: CephadmContext, ident: DaemonIdentity + ) -> 'CephExporter': + return cls.init(ctx, ident.fsid, ident.daemon_id) + + @property + def identity(self) -> DaemonIdentity: + return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) + + def get_daemon_args(self) -> List[str]: + args = [ + f'--sock-dir={self.sock_dir}', + f'--addrs={self.addrs}', + f'--port={self.port}', + f'--prio-limit={self.prio_limit}', + f'--stats-period={self.stats_period}', + ] + return args + + def validate(self) -> None: + if not os.path.isdir(self.sock_dir): + raise Error(f'Directory does not exist. Got: {self.sock_dir}') + + def container(self, ctx: CephadmContext) -> CephContainer: + ctr = daemon_to_container(ctx, self) + return to_deployment_container(ctx, ctr) + + def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: + return extract_uid_gid(ctx) + + def config_and_keyring( + self, ctx: CephadmContext + ) -> Tuple[Optional[str], Optional[str]]: + return get_config_and_keyring(ctx) + + def customize_container_mounts( + self, ctx: CephadmContext, mounts: Dict[str, str] + ) -> None: + cm = Ceph.get_ceph_mounts(ctx, self.identity) + mounts.update(cm) + + def customize_process_args( + self, ctx: CephadmContext, args: List[str] + ) -> None: + name = 'client.ceph-exporter.%s' % self.identity.daemon_id + args.extend(['-n', name, '-f']) + args.extend(self.get_daemon_args()) + + def customize_container_args( + self, ctx: CephadmContext, args: List[str] + ) -> None: + args.append(ctx.container_engine.unlimited_pids_option) + + def customize_container_envs( + self, ctx: CephadmContext, envs: List[str] + ) -> None: + envs.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728') + + def default_entrypoint(self) -> str: + return self.entrypoint + + +def get_ceph_mounts_for_type( + ctx: CephadmContext, fsid: str, daemon_type: str +) -> Dict[str, str]: + """The main implementation of get_container_mounts_for_type minus the call + to _update_podman_mounts so that this can be called from + get_container_mounts. + """ + mounts = dict() + + if daemon_type in ceph_daemons(): + if fsid: + run_path = os.path.join('/var/run/ceph', fsid) + if os.path.exists(run_path): + mounts[run_path] = '/var/run/ceph:z' + log_dir = os.path.join(ctx.log_dir, fsid) + mounts[log_dir] = '/var/log/ceph:z' + crash_dir = '/var/lib/ceph/%s/crash' % fsid + if os.path.exists(crash_dir): + mounts[crash_dir] = '/var/lib/ceph/crash:z' + if daemon_type != 'crash' and should_log_to_journald(ctx): + journald_sock_dir = '/run/systemd/journal' + mounts[journald_sock_dir] = journald_sock_dir + + if daemon_type in ['mon', 'osd', 'clusterless-ceph-volume']: + mounts['/dev'] = '/dev' # FIXME: narrow this down? + mounts['/run/udev'] = '/run/udev' + if daemon_type in ['osd', 'clusterless-ceph-volume']: + mounts['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ... + mounts['/run/lvm'] = '/run/lvm' + mounts['/run/lock/lvm'] = '/run/lock/lvm' + if daemon_type == 'osd': + # selinux-policy in the container may not match the host. + if HostFacts(ctx).selinux_enabled: + cluster_dir = f'{ctx.data_dir}/{fsid}' + selinux_folder = f'{cluster_dir}/selinux' + if os.path.exists(cluster_dir): + if not os.path.exists(selinux_folder): + os.makedirs(selinux_folder, mode=0o755) + mounts[selinux_folder] = '/sys/fs/selinux:ro' + else: + logger.error( + f'Cluster direcotry {cluster_dir} does not exist.' + ) + mounts['/'] = '/rootfs' + + try: + if ( + ctx.shared_ceph_folder + ): # make easy manager modules/ceph-volume development + ceph_folder = pathify(ctx.shared_ceph_folder) + if os.path.exists(ceph_folder): + cephadm_binary = ceph_folder + '/src/cephadm/cephadm' + if not os.path.exists(pathify(cephadm_binary)): + raise Error( + "cephadm binary does not exist. Please run './build.sh cephadm' from ceph/src/cephadm/ directory." + ) + mounts[cephadm_binary] = '/usr/sbin/cephadm' + mounts[ + ceph_folder + '/src/ceph-volume/ceph_volume' + ] = '/usr/lib/python3.6/site-packages/ceph_volume' + mounts[ + ceph_folder + '/src/pybind/mgr' + ] = '/usr/share/ceph/mgr' + mounts[ + ceph_folder + '/src/python-common/ceph' + ] = '/usr/lib/python3.6/site-packages/ceph' + mounts[ + ceph_folder + '/monitoring/ceph-mixin/dashboards_out' + ] = '/etc/grafana/dashboards/ceph-dashboard' + mounts[ + ceph_folder + + '/monitoring/ceph-mixin/prometheus_alerts.yml' + ] = '/etc/prometheus/ceph/ceph_default_alerts.yml' + else: + logger.error( + 'Ceph shared source folder does not exist.', + extra=Highlight.FAILURE.extra(), + ) + except AttributeError: + pass + return mounts + + +def ceph_daemons() -> List[str]: + """A legacy method that returns a list of all daemon types considered ceph + daemons. + """ + cds = list(Ceph._daemons) + cds.append(CephExporter.daemon_type) + return cds diff --git a/src/cephadm/cephadmlib/daemons/custom.py b/src/cephadm/cephadmlib/daemons/custom.py new file mode 100644 index 00000000000..e833c80c9a5 --- /dev/null +++ b/src/cephadm/cephadmlib/daemons/custom.py @@ -0,0 +1,222 @@ +import logging +import os +import re + +from typing import Any, Dict, List, Optional, Tuple, Union + +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container +from ..container_types import CephContainer, InitContainer +from ..context import CephadmContext +from ..context_getters import fetch_configs +from ..daemon_form import register as register_daemon_form +from ..daemon_identity import DaemonIdentity +from ..data_utils import dict_get, dict_get_join +from ..deploy import DeploymentType +from ..deployment_utils import to_deployment_container +from ..file_utils import write_new, makedirs +from ..net_utils import EndPoint + + +logger = logging.getLogger() + + +@register_daemon_form +class CustomContainer(ContainerDaemonForm): + """Defines a custom container""" + + daemon_type = 'container' + + @classmethod + def for_daemon_type(cls, daemon_type: str) -> bool: + return cls.daemon_type == daemon_type + + def __init__( + self, + fsid: str, + daemon_id: Union[int, str], + config_json: Dict, + image: str, + ) -> None: + self.fsid = fsid + self.daemon_id = daemon_id + self.image = image + + # config-json options + self.entrypoint = dict_get(config_json, 'entrypoint') + self.uid = dict_get(config_json, 'uid', 65534) # nobody + self.gid = dict_get(config_json, 'gid', 65534) # nobody + self.volume_mounts = dict_get(config_json, 'volume_mounts', {}) + self.args = dict_get(config_json, 'args', []) + self.envs = dict_get(config_json, 'envs', []) + self.privileged = dict_get(config_json, 'privileged', False) + self.bind_mounts = dict_get(config_json, 'bind_mounts', []) + self.ports = dict_get(config_json, 'ports', []) + self.dirs = dict_get(config_json, 'dirs', []) + self.files = dict_get(config_json, 'files', {}) + + @classmethod + def init( + cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] + ) -> 'CustomContainer': + return cls(fsid, daemon_id, fetch_configs(ctx), ctx.image) + + @classmethod + def create( + cls, ctx: CephadmContext, ident: DaemonIdentity + ) -> 'CustomContainer': + return cls.init(ctx, ident.fsid, ident.daemon_id) + + @property + def identity(self) -> DaemonIdentity: + return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) + + def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: + """ + Create dirs/files below the container data directory. + """ + logger.info( + 'Creating custom container configuration ' + 'dirs/files in {} ...'.format(data_dir) + ) + + if not os.path.isdir(data_dir): + raise OSError('data_dir is not a directory: %s' % data_dir) + + for dir_path in self.dirs: + logger.info('Creating directory: {}'.format(dir_path)) + dir_path = os.path.join(data_dir, dir_path.strip('/')) + makedirs(dir_path, uid, gid, 0o755) + + for file_path in self.files: + logger.info('Creating file: {}'.format(file_path)) + content = dict_get_join(self.files, file_path) + file_path = os.path.join(data_dir, file_path.strip('/')) + with write_new( + file_path, owner=(uid, gid), encoding='utf-8' + ) as f: + f.write(content) + + def get_daemon_args(self) -> List[str]: + return [] + + def get_container_args(self) -> List[str]: + return self.args + + def get_container_envs(self) -> List[str]: + return self.envs + + def _get_container_mounts(self, data_dir: str) -> Dict[str, str]: + """ + Get the volume mounts. Relative source paths will be located below + `/var/lib/ceph/<cluster-fsid>/<daemon-name>`. + + Example: + { + /foo/conf: /conf + foo/conf: /conf + } + becomes + { + /foo/conf: /conf + /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf + } + """ + mounts = {} + for source, destination in self.volume_mounts.items(): + source = os.path.join(data_dir, source) + mounts[source] = destination + return mounts + + def customize_container_mounts( + self, ctx: CephadmContext, mounts: Dict[str, str] + ) -> None: + data_dir = self.identity.data_dir(ctx.data_dir) + mounts.update(self._get_container_mounts(data_dir)) + + def _get_container_binds(self, data_dir: str) -> List[List[str]]: + """ + Get the bind mounts. Relative `source=...` paths will be located below + `/var/lib/ceph/<cluster-fsid>/<daemon-name>`. + + Example: + [ + 'type=bind', + 'source=lib/modules', + 'destination=/lib/modules', + 'ro=true' + ] + becomes + [ + ... + 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules', + ... + ] + """ + binds = self.bind_mounts.copy() + for bind in binds: + for index, value in enumerate(bind): + match = re.match(r'^source=(.+)$', value) + if match: + bind[index] = 'source={}'.format( + os.path.join(data_dir, match.group(1)) + ) + return binds + + def customize_container_binds( + self, ctx: CephadmContext, binds: List[List[str]] + ) -> None: + data_dir = self.identity.data_dir(ctx.data_dir) + binds.extend(self._get_container_binds(data_dir)) + + # Cache the container so we don't need to rebuild it again when calling + # into init_containers + _container: Optional[CephContainer] = None + + def container(self, ctx: CephadmContext) -> CephContainer: + if self._container is None: + ctr = daemon_to_container( + ctx, + self, + host_network=False, + privileged=self.privileged, + ptrace=ctx.allow_ptrace, + ) + self._container = to_deployment_container(ctx, ctr) + return self._container + + def init_containers(self, ctx: CephadmContext) -> List[InitContainer]: + primary = self.container(ctx) + init_containers: List[Dict[str, Any]] = getattr( + ctx, 'init_containers', [] + ) + return [ + InitContainer.from_primary_and_opts(ctx, primary, ic_opts) + for ic_opts in init_containers + ] + + def customize_container_endpoints( + self, endpoints: List[EndPoint], deployment_type: DeploymentType + ) -> None: + if deployment_type == DeploymentType.DEFAULT: + endpoints.extend([EndPoint('0.0.0.0', p) for p in self.ports]) + + def customize_container_envs( + self, ctx: CephadmContext, envs: List[str] + ) -> None: + envs.extend(self.get_container_envs()) + + def customize_container_args( + self, ctx: CephadmContext, args: List[str] + ) -> None: + args.extend(self.get_container_args()) + + def customize_process_args( + self, ctx: CephadmContext, args: List[str] + ) -> None: + args.extend(self.get_daemon_args()) + + def default_entrypoint(self) -> str: + return self.entrypoint or '' + + def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: + return self.uid, self.gid diff --git a/src/cephadm/cephadmlib/daemons/ingress.py b/src/cephadm/cephadmlib/daemons/ingress.py new file mode 100644 index 00000000000..6064cf538fb --- /dev/null +++ b/src/cephadm/cephadmlib/daemons/ingress.py @@ -0,0 +1,290 @@ +import os + +from typing import Dict, List, Optional, Tuple, Union + +from ..constants import ( + DEFAULT_HAPROXY_IMAGE, + DEFAULT_KEEPALIVED_IMAGE, + DATA_DIR_MODE, +) +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container +from ..container_types import CephContainer, extract_uid_gid +from ..context import CephadmContext +from ..context_getters import fetch_configs +from ..daemon_form import register as register_daemon_form +from ..daemon_identity import DaemonIdentity +from ..data_utils import dict_get, is_fsid +from ..deployment_utils import to_deployment_container +from ..exceptions import Error +from ..file_utils import makedirs, populate_files + + +@register_daemon_form +class HAproxy(ContainerDaemonForm): + """Defines an HAproxy container""" + + daemon_type = 'haproxy' + required_files = ['haproxy.cfg'] + default_image = DEFAULT_HAPROXY_IMAGE + + @classmethod + def for_daemon_type(cls, daemon_type: str) -> bool: + return cls.daemon_type == daemon_type + + def __init__( + self, + ctx: CephadmContext, + fsid: str, + daemon_id: Union[int, str], + config_json: Dict, + image: str, + ) -> None: + self.ctx = ctx + self.fsid = fsid + self.daemon_id = daemon_id + self.image = image + + # config-json options + self.files = dict_get(config_json, 'files', {}) + + self.validate() + + @classmethod + def init( + cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] + ) -> 'HAproxy': + return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image) + + @classmethod + def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'HAproxy': + return cls.init(ctx, ident.fsid, ident.daemon_id) + + @property + def identity(self) -> DaemonIdentity: + return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) + + def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: + """Create files under the container data dir""" + if not os.path.isdir(data_dir): + raise OSError('data_dir is not a directory: %s' % (data_dir)) + + # create additional directories in data dir for HAproxy to use + if not os.path.isdir(os.path.join(data_dir, 'haproxy')): + makedirs( + os.path.join(data_dir, 'haproxy'), uid, gid, DATA_DIR_MODE + ) + + data_dir = os.path.join(data_dir, 'haproxy') + populate_files(data_dir, self.files, uid, gid) + + def get_daemon_args(self) -> List[str]: + return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg'] + + def validate(self): + # type: () -> None + if not is_fsid(self.fsid): + raise Error('not an fsid: %s' % self.fsid) + if not self.daemon_id: + raise Error('invalid daemon_id: %s' % self.daemon_id) + if not self.image: + raise Error('invalid image: %s' % self.image) + + # check for the required files + if self.required_files: + for fname in self.required_files: + if fname not in self.files: + raise Error( + 'required file missing from config-json: %s' % fname + ) + + def get_daemon_name(self): + # type: () -> str + return '%s.%s' % (self.daemon_type, self.daemon_id) + + def get_container_name(self, desc=None): + # type: (Optional[str]) -> str + cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) + if desc: + cname = '%s-%s' % (cname, desc) + return cname + + def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: + # better directory for this? + print('UUUUU', extract_uid_gid) + return extract_uid_gid(self.ctx, file_path='/var/lib') + + @staticmethod + def _get_container_mounts(data_dir: str) -> Dict[str, str]: + mounts = dict() + mounts[os.path.join(data_dir, 'haproxy')] = '/var/lib/haproxy' + return mounts + + def customize_container_mounts( + self, ctx: CephadmContext, mounts: Dict[str, str] + ) -> None: + data_dir = self.identity.data_dir(ctx.data_dir) + mounts.update(self._get_container_mounts(data_dir)) + + @staticmethod + def get_sysctl_settings() -> List[str]: + return [ + '# IP forwarding and non-local bind', + 'net.ipv4.ip_forward = 1', + 'net.ipv4.ip_nonlocal_bind = 1', + ] + + def container(self, ctx: CephadmContext) -> CephContainer: + ctr = daemon_to_container(ctx, self) + return to_deployment_container(ctx, ctr) + + def customize_container_args( + self, ctx: CephadmContext, args: List[str] + ) -> None: + args.extend( + ['--user=root'] + ) # haproxy 2.4 defaults to a different user + + def customize_process_args( + self, ctx: CephadmContext, args: List[str] + ) -> None: + args.extend(self.get_daemon_args()) + + +@register_daemon_form +class Keepalived(ContainerDaemonForm): + """Defines an Keepalived container""" + + daemon_type = 'keepalived' + required_files = ['keepalived.conf'] + default_image = DEFAULT_KEEPALIVED_IMAGE + + @classmethod + def for_daemon_type(cls, daemon_type: str) -> bool: + return cls.daemon_type == daemon_type + + def __init__( + self, + ctx: CephadmContext, + fsid: str, + daemon_id: Union[int, str], + config_json: Dict, + image: str, + ) -> None: + self.ctx = ctx + self.fsid = fsid + self.daemon_id = daemon_id + self.image = image + + # config-json options + self.files = dict_get(config_json, 'files', {}) + + self.validate() + + @classmethod + def init( + cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] + ) -> 'Keepalived': + return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image) + + @classmethod + def create( + cls, ctx: CephadmContext, ident: DaemonIdentity + ) -> 'Keepalived': + return cls.init(ctx, ident.fsid, ident.daemon_id) + + @property + def identity(self) -> DaemonIdentity: + return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) + + def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: + """Create files under the container data dir""" + if not os.path.isdir(data_dir): + raise OSError('data_dir is not a directory: %s' % (data_dir)) + + # create additional directories in data dir for keepalived to use + if not os.path.isdir(os.path.join(data_dir, 'keepalived')): + makedirs( + os.path.join(data_dir, 'keepalived'), uid, gid, DATA_DIR_MODE + ) + + # populate files from the config-json + populate_files(data_dir, self.files, uid, gid) + + def validate(self): + # type: () -> None + if not is_fsid(self.fsid): + raise Error('not an fsid: %s' % self.fsid) + if not self.daemon_id: + raise Error('invalid daemon_id: %s' % self.daemon_id) + if not self.image: + raise Error('invalid image: %s' % self.image) + + # check for the required files + if self.required_files: + for fname in self.required_files: + if fname not in self.files: + raise Error( + 'required file missing from config-json: %s' % fname + ) + + def get_daemon_name(self): + # type: () -> str + return '%s.%s' % (self.daemon_type, self.daemon_id) + + def get_container_name(self, desc=None): + # type: (Optional[str]) -> str + cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) + if desc: + cname = '%s-%s' % (cname, desc) + return cname + + @staticmethod + def get_container_envs(): + # type: () -> List[str] + envs = [ + 'KEEPALIVED_AUTOCONF=false', + 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf', + 'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf', + 'KEEPALIVED_DEBUG=false', + ] + return envs + + @staticmethod + def get_sysctl_settings() -> List[str]: + return [ + '# IP forwarding and non-local bind', + 'net.ipv4.ip_forward = 1', + 'net.ipv4.ip_nonlocal_bind = 1', + ] + + def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: + # better directory for this? + return extract_uid_gid(self.ctx, file_path='/var/lib') + + @staticmethod + def _get_container_mounts(data_dir: str) -> Dict[str, str]: + mounts = dict() + mounts[ + os.path.join(data_dir, 'keepalived.conf') + ] = '/etc/keepalived/keepalived.conf' + return mounts + + def customize_container_mounts( + self, ctx: CephadmContext, mounts: Dict[str, str] + ) -> None: + data_dir = self.identity.data_dir(ctx.data_dir) + mounts.update(self._get_container_mounts(data_dir)) + + def container(self, ctx: CephadmContext) -> CephContainer: + ctr = daemon_to_container(ctx, self) + return to_deployment_container(ctx, ctr) + + def customize_container_envs( + self, ctx: CephadmContext, envs: List[str] + ) -> None: + envs.extend(self.get_container_envs()) + + def customize_container_args( + self, ctx: CephadmContext, args: List[str] + ) -> None: + args.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW']) diff --git a/src/cephadm/cephadmlib/daemons/iscsi.py b/src/cephadm/cephadmlib/daemons/iscsi.py new file mode 100644 index 00000000000..1845a37bf4e --- /dev/null +++ b/src/cephadm/cephadmlib/daemons/iscsi.py @@ -0,0 +1,286 @@ +import logging +import os +import re + +from typing import Dict, List, Optional, Tuple + +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container +from ..container_types import CephContainer, extract_uid_gid +from ..context_getters import fetch_configs, get_config_and_keyring +from ..daemon_form import register as register_daemon_form +from ..daemon_identity import DaemonIdentity, DaemonSubIdentity +from ..constants import DEFAULT_IMAGE +from ..context import CephadmContext +from ..data_utils import dict_get, is_fsid +from ..deployment_utils import to_deployment_container +from ..exceptions import Error +from ..file_utils import makedirs, populate_files +from ..call_wrappers import call, CallVerbosity + + +logger = logging.getLogger() + + +@register_daemon_form +class CephIscsi(ContainerDaemonForm): + """Defines a Ceph-Iscsi container""" + + daemon_type = 'iscsi' + entrypoint = '/usr/bin/rbd-target-api' + + required_files = ['iscsi-gateway.cfg'] + + @classmethod + def for_daemon_type(cls, daemon_type: str) -> bool: + return cls.daemon_type == daemon_type + + def __init__( + self, + ctx: CephadmContext, + ident: DaemonIdentity, + config_json: Dict, + image: str = DEFAULT_IMAGE, + ): + self.ctx = ctx + self._identity = ident + self.image = image + + # config-json options + self.files = dict_get(config_json, 'files', {}) + + # validate the supplied args + self.validate() + + @classmethod + def init( + cls, ctx: CephadmContext, fsid: str, daemon_id: str + ) -> 'CephIscsi': + return cls.create( + ctx, DaemonIdentity(fsid, cls.daemon_type, daemon_id) + ) + + @classmethod + def create( + cls, ctx: CephadmContext, ident: DaemonIdentity + ) -> 'CephIscsi': + return cls(ctx, ident, fetch_configs(ctx), ctx.image) + + @property + def identity(self) -> DaemonIdentity: + return self._identity + + @property + def fsid(self) -> str: + return self._identity.fsid + + @property + def daemon_id(self) -> str: + return self._identity.daemon_id + + @staticmethod + def _get_container_mounts(data_dir, log_dir): + # type: (str, str) -> Dict[str, str] + mounts = dict() + mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z' + mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z' + mounts[ + os.path.join(data_dir, 'iscsi-gateway.cfg') + ] = '/etc/ceph/iscsi-gateway.cfg:z' + mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config' + mounts[ + os.path.join(data_dir, 'tcmu-runner-entrypoint.sh') + ] = '/usr/local/scripts/tcmu-runner-entrypoint.sh' + mounts[log_dir] = '/var/log:z' + mounts['/dev'] = '/dev' + return mounts + + def customize_container_mounts( + self, ctx: CephadmContext, mounts: Dict[str, str] + ) -> None: + data_dir = self.identity.data_dir(ctx.data_dir) + # Removes ending ".tcmu" from data_dir a tcmu-runner uses the same + # data_dir as rbd-runner-api + if data_dir.endswith('.tcmu'): + data_dir = re.sub(r'\.tcmu$', '', data_dir) + log_dir = os.path.join(ctx.log_dir, self.identity.fsid) + mounts.update(CephIscsi._get_container_mounts(data_dir, log_dir)) + + def customize_container_binds( + self, ctx: CephadmContext, binds: List[List[str]] + ) -> None: + lib_modules = [ + 'type=bind', + 'source=/lib/modules', + 'destination=/lib/modules', + 'ro=true', + ] + binds.append(lib_modules) + + @staticmethod + def get_version(ctx, container_id): + # type: (CephadmContext, str) -> Optional[str] + version = None + out, err, code = call( + ctx, + [ + ctx.container_engine.path, + 'exec', + container_id, + '/usr/bin/python3', + '-c', + "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)", + ], + verbosity=CallVerbosity.QUIET, + ) + if code == 0: + version = out.strip() + return version + + def validate(self): + # type: () -> None + if not is_fsid(self.fsid): + raise Error('not an fsid: %s' % self.fsid) + if not self.daemon_id: + raise Error('invalid daemon_id: %s' % self.daemon_id) + if not self.image: + raise Error('invalid image: %s' % self.image) + + # check for the required files + if self.required_files: + for fname in self.required_files: + if fname not in self.files: + raise Error( + 'required file missing from config-json: %s' % fname + ) + + def get_daemon_name(self): + # type: () -> str + return '%s.%s' % (self.daemon_type, self.daemon_id) + + def get_container_name(self, desc=None): + # type: (Optional[str]) -> str + cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) + if desc: + cname = '%s-%s' % (cname, desc) + return cname + + def create_daemon_dirs(self, data_dir, uid, gid): + # type: (str, int, int) -> None + """Create files under the container data dir""" + if not os.path.isdir(data_dir): + raise OSError('data_dir is not a directory: %s' % (data_dir)) + + logger.info('Creating ceph-iscsi config...') + configfs_dir = os.path.join(data_dir, 'configfs') + makedirs(configfs_dir, uid, gid, 0o755) + + # set up the tcmu-runner entrypoint script + # to be mounted into the container. For more info + # on why we need this script, see the + # tcmu_runner_entrypoint_script function + self.files[ + 'tcmu-runner-entrypoint.sh' + ] = self.tcmu_runner_entrypoint_script() + + # populate files from the config-json + populate_files(data_dir, self.files, uid, gid) + + # we want the tcmu runner entrypoint script to be executable + # populate_files will give it 0o600 by default + os.chmod(os.path.join(data_dir, 'tcmu-runner-entrypoint.sh'), 0o700) + + @staticmethod + def configfs_mount_umount(data_dir, mount=True): + # type: (str, bool) -> List[str] + mount_path = os.path.join(data_dir, 'configfs') + if mount: + cmd = ( + 'if ! grep -qs {0} /proc/mounts; then ' + 'mount -t configfs none {0}; fi'.format(mount_path) + ) + else: + cmd = ( + 'if grep -qs {0} /proc/mounts; then ' + 'umount {0}; fi'.format(mount_path) + ) + return cmd.split() + + @staticmethod + def tcmu_runner_entrypoint_script() -> str: + # since we are having tcmu-runner be a background + # process in its systemd unit (rbd-target-api being + # the main process) systemd will not restart it when + # it fails. in order to try and get around that for now + # we can have a script mounted in the container that + # that attempts to do the restarting for us. This script + # can then become the entrypoint for the tcmu-runner + # container + + # This is intended to be dropped for a better solution + # for at least the squid release onward + return """#!/bin/bash +RUN_DIR=/var/run/tcmu-runner + +if [ ! -d "${RUN_DIR}" ] ; then + mkdir -p "${RUN_DIR}" +fi + +rm -rf "${RUN_DIR}"/* + +while true +do + touch "${RUN_DIR}"/start-up-$(date -Ins) + /usr/bin/tcmu-runner + + # If we got around 3 kills/segfaults in the last minute, + # don't start anymore + if [ $(find "${RUN_DIR}" -type f -cmin -1 | wc -l) -ge 3 ] ; then + exit 0 + fi + + sleep 1 +done +""" + + def get_tcmu_runner_container(self): + # type: () -> CephContainer + # daemon_id, is used to generated the cid and pid files used by podman but as both tcmu-runner + # and rbd-target-api have the same daemon_id, it conflits and prevent the second container from + # starting. .tcmu runner is appended to the daemon_id to fix that. + subident = DaemonSubIdentity( + self.fsid, self.daemon_type, self.daemon_id, 'tcmu' + ) + tcmu_dmn = self.create(self.ctx, subident) + tcmu_container = to_deployment_container( + self.ctx, daemon_to_container(self.ctx, tcmu_dmn, privileged=True) + ) + # TODO: Eventually we don't want to run tcmu-runner through this script. + # This is intended to be a workaround backported to older releases + # and should eventually be removed in at least squid onward + tcmu_container.entrypoint = ( + '/usr/local/scripts/tcmu-runner-entrypoint.sh' + ) + tcmu_container.cname = self.get_container_name(desc='tcmu') + return tcmu_container + + def container(self, ctx: CephadmContext) -> CephContainer: + # So the container can modprobe iscsi_target_mod and have write perms + # to configfs we need to make this a privileged container. + ctr = daemon_to_container(ctx, self, privileged=True) + return to_deployment_container(ctx, ctr) + + def config_and_keyring( + self, ctx: CephadmContext + ) -> Tuple[Optional[str], Optional[str]]: + return get_config_and_keyring(ctx) + + def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: + return extract_uid_gid(ctx) + + def default_entrypoint(self) -> str: + return self.entrypoint + + def customize_container_args( + self, ctx: CephadmContext, args: List[str] + ) -> None: + args.append(ctx.container_engine.unlimited_pids_option) diff --git a/src/cephadm/cephadmlib/daemons/monitoring.py b/src/cephadm/cephadmlib/daemons/monitoring.py new file mode 100644 index 00000000000..e0667853dd7 --- /dev/null +++ b/src/cephadm/cephadmlib/daemons/monitoring.py @@ -0,0 +1,377 @@ +import os + +from typing import Dict, List, Tuple + +from ..call_wrappers import call, CallVerbosity +from ..constants import ( + DEFAULT_ALERT_MANAGER_IMAGE, + DEFAULT_GRAFANA_IMAGE, + DEFAULT_LOKI_IMAGE, + DEFAULT_NODE_EXPORTER_IMAGE, + DEFAULT_PROMETHEUS_IMAGE, + DEFAULT_PROMTAIL_IMAGE, +) +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container +from ..container_types import CephContainer, extract_uid_gid +from ..context import CephadmContext +from ..context_getters import fetch_configs, fetch_meta +from ..daemon_form import register as register_daemon_form +from ..daemon_identity import DaemonIdentity +from ..deployment_utils import to_deployment_container +from ..exceptions import Error +from ..net_utils import get_fqdn, get_hostname, get_ip_addresses, wrap_ipv6 + + +@register_daemon_form +class Monitoring(ContainerDaemonForm): + """Define the configs for the monitoring containers""" + + port_map = { + 'prometheus': [ + 9095 + ], # Avoid default 9090, due to conflict with cockpit UI + 'node-exporter': [9100], + 'grafana': [3000], + 'alertmanager': [9093, 9094], + 'loki': [3100], + 'promtail': [9080], + } + + components = { + 'prometheus': { + 'image': DEFAULT_PROMETHEUS_IMAGE, + 'cpus': '2', + 'memory': '4GB', + 'args': [ + '--config.file=/etc/prometheus/prometheus.yml', + '--storage.tsdb.path=/prometheus', + ], + 'config-json-files': [ + 'prometheus.yml', + ], + }, + 'loki': { + 'image': DEFAULT_LOKI_IMAGE, + 'cpus': '1', + 'memory': '1GB', + 'args': [ + '--config.file=/etc/loki/loki.yml', + ], + 'config-json-files': ['loki.yml'], + }, + 'promtail': { + 'image': DEFAULT_PROMTAIL_IMAGE, + 'cpus': '1', + 'memory': '1GB', + 'args': [ + '--config.file=/etc/promtail/promtail.yml', + ], + 'config-json-files': [ + 'promtail.yml', + ], + }, + 'node-exporter': { + 'image': DEFAULT_NODE_EXPORTER_IMAGE, + 'cpus': '1', + 'memory': '1GB', + 'args': ['--no-collector.timex'], + }, + 'grafana': { + 'image': DEFAULT_GRAFANA_IMAGE, + 'cpus': '2', + 'memory': '4GB', + 'args': [], + 'config-json-files': [ + 'grafana.ini', + 'provisioning/datasources/ceph-dashboard.yml', + 'certs/cert_file', + 'certs/cert_key', + ], + }, + 'alertmanager': { + 'image': DEFAULT_ALERT_MANAGER_IMAGE, + 'cpus': '2', + 'memory': '2GB', + 'args': [ + '--cluster.listen-address=:{}'.format( + port_map['alertmanager'][1] + ), + ], + 'config-json-files': [ + 'alertmanager.yml', + ], + 'config-json-args': [ + 'peers', + ], + }, + } # type: ignore + + @classmethod + def for_daemon_type(cls, daemon_type: str) -> bool: + return daemon_type in cls.components + + @staticmethod + def get_version(ctx, container_id, daemon_type): + # type: (CephadmContext, str, str) -> str + """ + :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter" + """ + assert daemon_type in ( + 'prometheus', + 'alertmanager', + 'node-exporter', + 'loki', + 'promtail', + ) + cmd = daemon_type.replace('-', '_') + code = -1 + err = '' + out = '' + version = '' + if daemon_type == 'alertmanager': + for cmd in ['alertmanager', 'prometheus-alertmanager']: + out, err, code = call( + ctx, + [ + ctx.container_engine.path, + 'exec', + container_id, + cmd, + '--version', + ], + verbosity=CallVerbosity.QUIET, + ) + if code == 0: + break + cmd = 'alertmanager' # reset cmd for version extraction + else: + out, err, code = call( + ctx, + [ + ctx.container_engine.path, + 'exec', + container_id, + cmd, + '--version', + ], + verbosity=CallVerbosity.QUIET, + ) + if code == 0: + if err.startswith('%s, version ' % cmd): + version = err.split(' ')[2] + elif out.startswith('%s, version ' % cmd): + version = out.split(' ')[2] + return version + + @staticmethod + def extract_uid_gid( + ctx: CephadmContext, daemon_type: str + ) -> Tuple[int, int]: + if daemon_type == 'prometheus': + uid, gid = extract_uid_gid(ctx, file_path='/etc/prometheus') + elif daemon_type == 'node-exporter': + uid, gid = 65534, 65534 + elif daemon_type == 'grafana': + uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana') + elif daemon_type == 'loki': + uid, gid = extract_uid_gid(ctx, file_path='/etc/loki') + elif daemon_type == 'promtail': + uid, gid = extract_uid_gid(ctx, file_path='/etc/promtail') + elif daemon_type == 'alertmanager': + uid, gid = extract_uid_gid( + ctx, file_path=['/etc/alertmanager', '/etc/prometheus'] + ) + else: + raise Error('{} not implemented yet'.format(daemon_type)) + return uid, gid + + def __init__(self, ctx: CephadmContext, ident: DaemonIdentity) -> None: + self.ctx = ctx + self._identity = ident + + @classmethod + def create( + cls, ctx: CephadmContext, ident: DaemonIdentity + ) -> 'Monitoring': + return cls(ctx, ident) + + @property + def identity(self) -> DaemonIdentity: + return self._identity + + def container(self, ctx: CephadmContext) -> CephContainer: + self._prevalidate(ctx) + ctr = daemon_to_container(ctx, self) + return to_deployment_container(ctx, ctr) + + def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: + return self.extract_uid_gid(ctx, self.identity.daemon_type) + + def _prevalidate(self, ctx: CephadmContext) -> None: + # before being refactored into a ContainerDaemonForm these checks were + # done inside the deploy function. This was the only "family" of daemons + # that performed these checks in that location + daemon_type = self.identity.daemon_type + config = fetch_configs(ctx) # type: ignore + required_files = self.components[daemon_type].get( + 'config-json-files', list() + ) + required_args = self.components[daemon_type].get( + 'config-json-args', list() + ) + if required_files: + if not config or not all(c in config.get('files', {}).keys() for c in required_files): # type: ignore + raise Error( + '{} deployment requires config-json which must ' + 'contain file content for {}'.format( + daemon_type.capitalize(), ', '.join(required_files) + ) + ) + if required_args: + if not config or not all(c in config.keys() for c in required_args): # type: ignore + raise Error( + '{} deployment requires config-json which must ' + 'contain arg for {}'.format( + daemon_type.capitalize(), ', '.join(required_args) + ) + ) + + def get_daemon_args(self) -> List[str]: + ctx = self.ctx + daemon_type = self.identity.daemon_type + metadata = self.components[daemon_type] + r = list(metadata.get('args', [])) + # set ip and port to bind to for nodeexporter,alertmanager,prometheus + if daemon_type not in ['grafana', 'loki', 'promtail']: + ip = '' + port = self.port_map[daemon_type][0] + meta = fetch_meta(ctx) + if meta: + if 'ip' in meta and meta['ip']: + ip = meta['ip'] + if 'ports' in meta and meta['ports']: + port = meta['ports'][0] + r += [f'--web.listen-address={ip}:{port}'] + if daemon_type == 'prometheus': + config = fetch_configs(ctx) + retention_time = config.get('retention_time', '15d') + retention_size = config.get( + 'retention_size', '0' + ) # default to disabled + r += [f'--storage.tsdb.retention.time={retention_time}'] + r += [f'--storage.tsdb.retention.size={retention_size}'] + scheme = 'http' + host = get_fqdn() + # in case host is not an fqdn then we use the IP to + # avoid producing a broken web.external-url link + if '.' not in host: + ipv4_addrs, ipv6_addrs = get_ip_addresses(get_hostname()) + # use the first ipv4 (if any) otherwise use the first ipv6 + addr = next(iter(ipv4_addrs or ipv6_addrs), None) + host = wrap_ipv6(addr) if addr else host + r += [f'--web.external-url={scheme}://{host}:{port}'] + if daemon_type == 'alertmanager': + config = fetch_configs(ctx) + peers = config.get('peers', list()) # type: ignore + for peer in peers: + r += ['--cluster.peer={}'.format(peer)] + try: + r += [f'--web.config.file={config["web_config"]}'] + except KeyError: + pass + # some alertmanager, by default, look elsewhere for a config + r += ['--config.file=/etc/alertmanager/alertmanager.yml'] + if daemon_type == 'promtail': + r += ['--config.expand-env'] + if daemon_type == 'prometheus': + config = fetch_configs(ctx) + try: + r += [f'--web.config.file={config["web_config"]}'] + except KeyError: + pass + if daemon_type == 'node-exporter': + config = fetch_configs(ctx) + try: + r += [f'--web.config.file={config["web_config"]}'] + except KeyError: + pass + r += [ + '--path.procfs=/host/proc', + '--path.sysfs=/host/sys', + '--path.rootfs=/rootfs', + ] + return r + + def _get_container_mounts(self, data_dir: str) -> Dict[str, str]: + ctx = self.ctx + daemon_type = self.identity.daemon_type + mounts: Dict[str, str] = {} + log_dir = os.path.join(ctx.log_dir, self.identity.fsid) + if daemon_type == 'prometheus': + mounts[ + os.path.join(data_dir, 'etc/prometheus') + ] = '/etc/prometheus:Z' + mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z' + elif daemon_type == 'loki': + mounts[os.path.join(data_dir, 'etc/loki')] = '/etc/loki:Z' + mounts[os.path.join(data_dir, 'data')] = '/loki:Z' + elif daemon_type == 'promtail': + mounts[os.path.join(data_dir, 'etc/promtail')] = '/etc/promtail:Z' + mounts[log_dir] = '/var/log/ceph:z' + mounts[os.path.join(data_dir, 'data')] = '/promtail:Z' + elif daemon_type == 'node-exporter': + mounts[ + os.path.join(data_dir, 'etc/node-exporter') + ] = '/etc/node-exporter:Z' + mounts['/proc'] = '/host/proc:ro' + mounts['/sys'] = '/host/sys:ro' + mounts['/'] = '/rootfs:ro' + elif daemon_type == 'grafana': + mounts[ + os.path.join(data_dir, 'etc/grafana/grafana.ini') + ] = '/etc/grafana/grafana.ini:Z' + mounts[ + os.path.join(data_dir, 'etc/grafana/provisioning/datasources') + ] = '/etc/grafana/provisioning/datasources:Z' + mounts[ + os.path.join(data_dir, 'etc/grafana/certs') + ] = '/etc/grafana/certs:Z' + mounts[ + os.path.join(data_dir, 'data/grafana.db') + ] = '/var/lib/grafana/grafana.db:Z' + elif daemon_type == 'alertmanager': + mounts[ + os.path.join(data_dir, 'etc/alertmanager') + ] = '/etc/alertmanager:Z' + return mounts + + def customize_container_mounts( + self, ctx: CephadmContext, mounts: Dict[str, str] + ) -> None: + data_dir = self.identity.data_dir(ctx.data_dir) + mounts.update(self._get_container_mounts(data_dir)) + + def customize_container_args( + self, ctx: CephadmContext, args: List[str] + ) -> None: + uid, _ = self.uid_gid(ctx) + monitoring_args = [ + '--user', + str(uid), + # FIXME: disable cpu/memory limits for the time being (not supported + # by ubuntu 18.04 kernel!) + ] + args.extend(monitoring_args) + if self.identity.daemon_type == 'node-exporter': + # in order to support setting '--path.procfs=/host/proc','--path.sysfs=/host/sys', + # '--path.rootfs=/rootfs' for node-exporter we need to disable selinux separation + # between the node-exporter container and the host to avoid selinux denials + args.extend(['--security-opt', 'label=disable']) + + def customize_process_args( + self, ctx: CephadmContext, args: List[str] + ) -> None: + args.extend(self.get_daemon_args()) + + def default_entrypoint(self) -> str: + return '' diff --git a/src/cephadm/cephadmlib/daemons/nfs.py b/src/cephadm/cephadmlib/daemons/nfs.py new file mode 100644 index 00000000000..6e2f2a945ca --- /dev/null +++ b/src/cephadm/cephadmlib/daemons/nfs.py @@ -0,0 +1,225 @@ +import logging +import os +import re + +from typing import Dict, List, Optional, Tuple, Union + +from ..call_wrappers import call, CallVerbosity +from ..constants import DEFAULT_IMAGE, CEPH_DEFAULT_CONF +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container +from ..container_types import CephContainer, extract_uid_gid +from ..context import CephadmContext +from ..context_getters import fetch_configs, get_config_and_keyring +from ..daemon_form import register as register_daemon_form +from ..daemon_identity import DaemonIdentity +from ..data_utils import dict_get, is_fsid +from ..deploy import DeploymentType +from ..deployment_utils import to_deployment_container +from ..exceptions import Error +from ..file_utils import makedirs, populate_files, write_new +from ..net_utils import EndPoint + + +logger = logging.getLogger() + + +@register_daemon_form +class NFSGanesha(ContainerDaemonForm): + """Defines a NFS-Ganesha container""" + + daemon_type = 'nfs' + entrypoint = '/usr/bin/ganesha.nfsd' + daemon_args = ['-F', '-L', 'STDERR'] + + required_files = ['ganesha.conf'] + + port_map = { + 'nfs': 2049, + } + + @classmethod + def for_daemon_type(cls, daemon_type: str) -> bool: + return cls.daemon_type == daemon_type + + def __init__( + self, ctx, fsid, daemon_id, config_json, image=DEFAULT_IMAGE + ): + # type: (CephadmContext, str, Union[int, str], Dict, str) -> None + self.ctx = ctx + self.fsid = fsid + self.daemon_id = daemon_id + self.image = image + + # config-json options + self.pool = dict_get(config_json, 'pool', require=True) + self.namespace = dict_get(config_json, 'namespace') + self.userid = dict_get(config_json, 'userid') + self.extra_args = dict_get(config_json, 'extra_args', []) + self.files = dict_get(config_json, 'files', {}) + self.rgw = dict_get(config_json, 'rgw', {}) + + # validate the supplied args + self.validate() + + @classmethod + def init(cls, ctx, fsid, daemon_id): + # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha + return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image) + + @classmethod + def create( + cls, ctx: CephadmContext, ident: DaemonIdentity + ) -> 'NFSGanesha': + return cls.init(ctx, ident.fsid, ident.daemon_id) + + @property + def identity(self) -> DaemonIdentity: + return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) + + def _get_container_mounts(self, data_dir): + # type: (str) -> Dict[str, str] + mounts = dict() + mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z' + mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z' + mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z' + if self.rgw: + cluster = self.rgw.get('cluster', 'ceph') + rgw_user = self.rgw.get('user', 'admin') + mounts[ + os.path.join(data_dir, 'keyring.rgw') + ] = '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user) + return mounts + + def customize_container_mounts( + self, ctx: CephadmContext, mounts: Dict[str, str] + ) -> None: + data_dir = self.identity.data_dir(ctx.data_dir) + mounts.update(self._get_container_mounts(data_dir)) + + @staticmethod + def get_container_envs(): + # type: () -> List[str] + envs = ['CEPH_CONF=%s' % (CEPH_DEFAULT_CONF)] + return envs + + @staticmethod + def get_version(ctx, container_id): + # type: (CephadmContext, str) -> Optional[str] + version = None + out, err, code = call( + ctx, + [ + ctx.container_engine.path, + 'exec', + container_id, + NFSGanesha.entrypoint, + '-v', + ], + verbosity=CallVerbosity.QUIET, + ) + if code == 0: + match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out) + if match: + version = match.group(1) + return version + + def validate(self): + # type: () -> None + if not is_fsid(self.fsid): + raise Error('not an fsid: %s' % self.fsid) + if not self.daemon_id: + raise Error('invalid daemon_id: %s' % self.daemon_id) + if not self.image: + raise Error('invalid image: %s' % self.image) + + # check for the required files + if self.required_files: + for fname in self.required_files: + if fname not in self.files: + raise Error( + 'required file missing from config-json: %s' % fname + ) + + # check for an RGW config + if self.rgw: + if not self.rgw.get('keyring'): + raise Error('RGW keyring is missing') + if not self.rgw.get('user'): + raise Error('RGW user is missing') + + def get_daemon_name(self): + # type: () -> str + return '%s.%s' % (self.daemon_type, self.daemon_id) + + def get_container_name(self, desc=None): + # type: (Optional[str]) -> str + cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) + if desc: + cname = '%s-%s' % (cname, desc) + return cname + + def get_daemon_args(self): + # type: () -> List[str] + return self.daemon_args + self.extra_args + + def create_daemon_dirs(self, data_dir, uid, gid): + # type: (str, int, int) -> None + """Create files under the container data dir""" + if not os.path.isdir(data_dir): + raise OSError('data_dir is not a directory: %s' % (data_dir)) + + logger.info('Creating ganesha config...') + + # create the ganesha conf dir + config_dir = os.path.join(data_dir, 'etc/ganesha') + makedirs(config_dir, uid, gid, 0o755) + + # populate files from the config-json + populate_files(config_dir, self.files, uid, gid) + + # write the RGW keyring + if self.rgw: + keyring_path = os.path.join(data_dir, 'keyring.rgw') + with write_new(keyring_path, owner=(uid, gid)) as f: + f.write(self.rgw.get('keyring', '')) + + def firewall_service_name(self) -> str: + return 'nfs' + + def container(self, ctx: CephadmContext) -> CephContainer: + ctr = daemon_to_container(ctx, self) + return to_deployment_container(ctx, ctr) + + def customize_container_endpoints( + self, endpoints: List[EndPoint], deployment_type: DeploymentType + ) -> None: + if deployment_type == DeploymentType.DEFAULT and not endpoints: + nfs_ports = list(NFSGanesha.port_map.values()) + endpoints.extend([EndPoint('0.0.0.0', p) for p in nfs_ports]) + + def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: + # TODO: extract ganesha uid/gid (997, 994) ? + return extract_uid_gid(ctx) + + def config_and_keyring( + self, ctx: CephadmContext + ) -> Tuple[Optional[str], Optional[str]]: + return get_config_and_keyring(ctx) + + def customize_container_envs( + self, ctx: CephadmContext, envs: List[str] + ) -> None: + envs.extend(self.get_container_envs()) + + def customize_process_args( + self, ctx: CephadmContext, args: List[str] + ) -> None: + args.extend(self.get_daemon_args()) + + def customize_container_args( + self, ctx: CephadmContext, args: List[str] + ) -> None: + args.append(ctx.container_engine.unlimited_pids_option) + + def default_entrypoint(self) -> str: + return self.entrypoint diff --git a/src/cephadm/cephadmlib/daemons/nvmeof.py b/src/cephadm/cephadmlib/daemons/nvmeof.py new file mode 100644 index 00000000000..39488406bc8 --- /dev/null +++ b/src/cephadm/cephadmlib/daemons/nvmeof.py @@ -0,0 +1,193 @@ +import logging +import os + +from typing import Dict, List, Optional, Tuple, Union + +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container +from ..container_types import CephContainer +from ..context_getters import fetch_configs, get_config_and_keyring +from ..daemon_form import register as register_daemon_form +from ..daemon_identity import DaemonIdentity +from ..constants import DEFAULT_NVMEOF_IMAGE +from ..context import CephadmContext +from ..data_utils import dict_get, is_fsid +from ..deployment_utils import to_deployment_container +from ..exceptions import Error +from ..file_utils import makedirs, populate_files +from ..call_wrappers import call + + +logger = logging.getLogger() + + +@register_daemon_form +class CephNvmeof(ContainerDaemonForm): + """Defines a Ceph-Nvmeof container""" + + daemon_type = 'nvmeof' + required_files = ['ceph-nvmeof.conf'] + default_image = DEFAULT_NVMEOF_IMAGE + + @classmethod + def for_daemon_type(cls, daemon_type: str) -> bool: + return cls.daemon_type == daemon_type + + def __init__( + self, ctx, fsid, daemon_id, config_json, image=DEFAULT_NVMEOF_IMAGE + ): + # type: (CephadmContext, str, Union[int, str], Dict, str) -> None + self.ctx = ctx + self.fsid = fsid + self.daemon_id = daemon_id + self.image = image + + # config-json options + self.files = dict_get(config_json, 'files', {}) + + # validate the supplied args + self.validate() + + @classmethod + def init(cls, ctx, fsid, daemon_id): + # type: (CephadmContext, str, Union[int, str]) -> CephNvmeof + return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image) + + @classmethod + def create( + cls, ctx: CephadmContext, ident: DaemonIdentity + ) -> 'CephNvmeof': + return cls.init(ctx, ident.fsid, ident.daemon_id) + + @property + def identity(self) -> DaemonIdentity: + return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) + + @staticmethod + def _get_container_mounts(data_dir: str) -> Dict[str, str]: + mounts = dict() + mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z' + mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z' + mounts[ + os.path.join(data_dir, 'ceph-nvmeof.conf') + ] = '/src/ceph-nvmeof.conf:z' + mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config' + mounts['/dev/hugepages'] = '/dev/hugepages' + mounts['/dev/vfio/vfio'] = '/dev/vfio/vfio' + return mounts + + def customize_container_mounts( + self, ctx: CephadmContext, mounts: Dict[str, str] + ) -> None: + data_dir = self.identity.data_dir(ctx.data_dir) + mounts.update(self._get_container_mounts(data_dir)) + + def customize_container_binds( + self, ctx: CephadmContext, binds: List[List[str]] + ) -> None: + lib_modules = [ + 'type=bind', + 'source=/lib/modules', + 'destination=/lib/modules', + 'ro=true', + ] + binds.append(lib_modules) + + @staticmethod + def get_version(ctx: CephadmContext, container_id: str) -> Optional[str]: + out, err, ret = call( + ctx, + [ + ctx.container_engine.path, + 'inspect', + '--format', + '{{index .Config.Labels "io.ceph.version"}}', + ctx.image, + ], + ) + version = None + if ret == 0: + version = out.strip() + return version + + def validate(self): + # type: () -> None + if not is_fsid(self.fsid): + raise Error('not an fsid: %s' % self.fsid) + if not self.daemon_id: + raise Error('invalid daemon_id: %s' % self.daemon_id) + if not self.image: + raise Error('invalid image: %s' % self.image) + + # check for the required files + if self.required_files: + for fname in self.required_files: + if fname not in self.files: + raise Error( + 'required file missing from config-json: %s' % fname + ) + + def get_daemon_name(self): + # type: () -> str + return '%s.%s' % (self.daemon_type, self.daemon_id) + + def get_container_name(self, desc=None): + # type: (Optional[str]) -> str + cname = '%s-%s' % (self.fsid, self.get_daemon_name()) + if desc: + cname = '%s-%s' % (cname, desc) + return cname + + def create_daemon_dirs(self, data_dir, uid, gid): + # type: (str, int, int) -> None + """Create files under the container data dir""" + if not os.path.isdir(data_dir): + raise OSError('data_dir is not a directory: %s' % (data_dir)) + + logger.info('Creating ceph-nvmeof config...') + configfs_dir = os.path.join(data_dir, 'configfs') + makedirs(configfs_dir, uid, gid, 0o755) + + # populate files from the config-json + populate_files(data_dir, self.files, uid, gid) + + @staticmethod + def configfs_mount_umount(data_dir, mount=True): + # type: (str, bool) -> List[str] + mount_path = os.path.join(data_dir, 'configfs') + if mount: + cmd = ( + 'if ! grep -qs {0} /proc/mounts; then ' + 'mount -t configfs none {0}; fi'.format(mount_path) + ) + else: + cmd = ( + 'if grep -qs {0} /proc/mounts; then ' + 'umount {0}; fi'.format(mount_path) + ) + return cmd.split() + + @staticmethod + def get_sysctl_settings() -> List[str]: + return [ + 'vm.nr_hugepages = 4096', + ] + + def container(self, ctx: CephadmContext) -> CephContainer: + ctr = daemon_to_container(ctx, self) + return to_deployment_container(ctx, ctr) + + def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: + return 167, 167 # TODO: need to get properly the uid/gid + + def config_and_keyring( + self, ctx: CephadmContext + ) -> Tuple[Optional[str], Optional[str]]: + return get_config_and_keyring(ctx) + + def customize_container_args( + self, ctx: CephadmContext, args: List[str] + ) -> None: + args.append(ctx.container_engine.unlimited_pids_option) + args.extend(['--ulimit', 'memlock=-1:-1']) + args.extend(['--ulimit', 'nofile=10240']) + args.extend(['--cap-add=SYS_ADMIN', '--cap-add=CAP_SYS_NICE']) diff --git a/src/cephadm/cephadmlib/daemons/snmp.py b/src/cephadm/cephadmlib/daemons/snmp.py new file mode 100644 index 00000000000..f334e5f7652 --- /dev/null +++ b/src/cephadm/cephadmlib/daemons/snmp.py @@ -0,0 +1,226 @@ +import json +import os + +from typing import Any, Dict, List, Optional, Tuple, Union +from urllib.error import HTTPError, URLError +from urllib.request import urlopen + +from ..constants import DEFAULT_SNMP_GATEWAY_IMAGE +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container +from ..container_types import CephContainer +from ..context import CephadmContext +from ..context_getters import fetch_configs, fetch_endpoints +from ..daemon_form import register as register_daemon_form +from ..daemon_identity import DaemonIdentity +from ..data_utils import is_fsid +from ..deployment_utils import to_deployment_container +from ..exceptions import Error +from ..file_utils import write_new + + +@register_daemon_form +class SNMPGateway(ContainerDaemonForm): + """Defines an SNMP gateway between Prometheus and SNMP monitoring Frameworks""" + + daemon_type = 'snmp-gateway' + SUPPORTED_VERSIONS = ['V2c', 'V3'] + default_image = DEFAULT_SNMP_GATEWAY_IMAGE + DEFAULT_PORT = 9464 + env_filename = 'snmp-gateway.conf' + + @classmethod + def for_daemon_type(cls, daemon_type: str) -> bool: + return cls.daemon_type == daemon_type + + def __init__( + self, + ctx: CephadmContext, + fsid: str, + daemon_id: Union[int, str], + config_json: Dict[str, Any], + image: Optional[str] = None, + ) -> None: + self.ctx = ctx + self.fsid = fsid + self.daemon_id = daemon_id + self.image = image or SNMPGateway.default_image + + self.uid = config_json.get('uid', 0) + self.gid = config_json.get('gid', 0) + + self.destination = config_json.get('destination', '') + self.snmp_version = config_json.get('snmp_version', 'V2c') + self.snmp_community = config_json.get('snmp_community', 'public') + self.log_level = config_json.get('log_level', 'info') + self.snmp_v3_auth_username = config_json.get( + 'snmp_v3_auth_username', '' + ) + self.snmp_v3_auth_password = config_json.get( + 'snmp_v3_auth_password', '' + ) + self.snmp_v3_auth_protocol = config_json.get( + 'snmp_v3_auth_protocol', '' + ) + self.snmp_v3_priv_protocol = config_json.get( + 'snmp_v3_priv_protocol', '' + ) + self.snmp_v3_priv_password = config_json.get( + 'snmp_v3_priv_password', '' + ) + self.snmp_v3_engine_id = config_json.get('snmp_v3_engine_id', '') + + self.validate() + + @classmethod + def init( + cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] + ) -> 'SNMPGateway': + cfgs = fetch_configs(ctx) + assert cfgs # assert some config data was found + return cls(ctx, fsid, daemon_id, cfgs, ctx.image) + + @classmethod + def create( + cls, ctx: CephadmContext, ident: DaemonIdentity + ) -> 'SNMPGateway': + return cls.init(ctx, ident.fsid, ident.daemon_id) + + @property + def identity(self) -> DaemonIdentity: + return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) + + @staticmethod + def get_version( + ctx: CephadmContext, fsid: str, daemon_id: str + ) -> Optional[str]: + """Return the version of the notifier from it's http endpoint""" + path = os.path.join( + ctx.data_dir, fsid, f'snmp-gateway.{daemon_id}', 'unit.meta' + ) + try: + with open(path, 'r') as env: + metadata = json.loads(env.read()) + except (OSError, json.JSONDecodeError): + return None + + ports = metadata.get('ports', []) + if not ports: + return None + + try: + with urlopen(f'http://127.0.0.1:{ports[0]}/') as r: + html = r.read().decode('utf-8').split('\n') + except (HTTPError, URLError): + return None + + for h in html: + stripped = h.strip() + if stripped.startswith(('<pre>', '<PRE>')) and stripped.endswith( + ('</pre>', '</PRE>') + ): + # <pre>(version=1.2.1, branch=HEAD, revision=7... + return stripped.split(',')[0].split('version=')[1] + + return None + + @property + def port(self) -> int: + endpoints = fetch_endpoints(self.ctx) + if not endpoints: + return self.DEFAULT_PORT + return endpoints[0].port + + def get_daemon_args(self) -> List[str]: + v3_args = [] + base_args = [ + f'--web.listen-address=:{self.port}', + f'--snmp.destination={self.destination}', + f'--snmp.version={self.snmp_version}', + f'--log.level={self.log_level}', + '--snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl', + ] + + if self.snmp_version == 'V3': + # common auth settings + v3_args.extend( + [ + '--snmp.authentication-enabled', + f'--snmp.authentication-protocol={self.snmp_v3_auth_protocol}', + f'--snmp.security-engine-id={self.snmp_v3_engine_id}', + ] + ) + # authPriv setting is applied if we have a privacy protocol setting + if self.snmp_v3_priv_protocol: + v3_args.extend( + [ + '--snmp.private-enabled', + f'--snmp.private-protocol={self.snmp_v3_priv_protocol}', + ] + ) + + return base_args + v3_args + + @property + def data_dir(self) -> str: + return os.path.join( + self.ctx.data_dir, + self.ctx.fsid, + f'{self.daemon_type}.{self.daemon_id}', + ) + + @property + def conf_file_path(self) -> str: + return os.path.join(self.data_dir, self.env_filename) + + def create_daemon_conf(self) -> None: + """Creates the environment file holding 'secrets' passed to the snmp-notifier daemon""" + with write_new(self.conf_file_path) as f: + if self.snmp_version == 'V2c': + f.write(f'SNMP_NOTIFIER_COMMUNITY={self.snmp_community}\n') + else: + f.write( + f'SNMP_NOTIFIER_AUTH_USERNAME={self.snmp_v3_auth_username}\n' + ) + f.write( + f'SNMP_NOTIFIER_AUTH_PASSWORD={self.snmp_v3_auth_password}\n' + ) + if self.snmp_v3_priv_password: + f.write( + f'SNMP_NOTIFIER_PRIV_PASSWORD={self.snmp_v3_priv_password}\n' + ) + + def validate(self) -> None: + """Validate the settings + + Raises: + Error: if the fsid doesn't look like an fsid + Error: if the snmp version is not supported + Error: destination IP and port address missing + """ + if not is_fsid(self.fsid): + raise Error(f'not a valid fsid: {self.fsid}') + + if self.snmp_version not in SNMPGateway.SUPPORTED_VERSIONS: + raise Error(f'not a valid snmp version: {self.snmp_version}') + + if not self.destination: + raise Error( + 'config is missing destination attribute(<ip>:<port>) of the target SNMP listener' + ) + + def container(self, ctx: CephadmContext) -> CephContainer: + ctr = daemon_to_container(ctx, self) + return to_deployment_container(ctx, ctr) + + def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: + return self.uid, self.gid + + def customize_container_args( + self, ctx: CephadmContext, args: List[str] + ) -> None: + args.append(f'--env-file={self.conf_file_path}') + + def customize_process_args( + self, ctx: CephadmContext, args: List[str] + ) -> None: + args.extend(self.get_daemon_args()) diff --git a/src/cephadm/cephadmlib/daemons/tracing.py b/src/cephadm/cephadmlib/daemons/tracing.py new file mode 100644 index 00000000000..4d4fecacbb0 --- /dev/null +++ b/src/cephadm/cephadmlib/daemons/tracing.py @@ -0,0 +1,116 @@ +import logging + +from typing import Any, Dict, List, Tuple + +from ..constants import ( + DEFAULT_ELASTICSEARCH_IMAGE, + DEFAULT_JAEGER_AGENT_IMAGE, + DEFAULT_JAEGER_COLLECTOR_IMAGE, + DEFAULT_JAEGER_QUERY_IMAGE, +) +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container +from ..container_types import CephContainer +from ..context import CephadmContext +from ..context_getters import fetch_configs +from ..daemon_form import register as register_daemon_form +from ..daemon_identity import DaemonIdentity +from ..deployment_utils import to_deployment_container + + +logger = logging.getLogger() + + +@register_daemon_form +class Tracing(ContainerDaemonForm): + """Define the configs for the jaeger tracing containers""" + + components: Dict[str, Dict[str, Any]] = { + 'elasticsearch': { + 'image': DEFAULT_ELASTICSEARCH_IMAGE, + 'envs': ['discovery.type=single-node'], + }, + 'jaeger-agent': { + 'image': DEFAULT_JAEGER_AGENT_IMAGE, + }, + 'jaeger-collector': { + 'image': DEFAULT_JAEGER_COLLECTOR_IMAGE, + }, + 'jaeger-query': { + 'image': DEFAULT_JAEGER_QUERY_IMAGE, + }, + } # type: ignore + + @classmethod + def for_daemon_type(cls, daemon_type: str) -> bool: + return daemon_type in cls.components + + @staticmethod + def set_configuration(config: Dict[str, str], daemon_type: str) -> None: + if daemon_type in ['jaeger-collector', 'jaeger-query']: + assert 'elasticsearch_nodes' in config + Tracing.components[daemon_type]['envs'] = [ + 'SPAN_STORAGE_TYPE=elasticsearch', + f'ES_SERVER_URLS={config["elasticsearch_nodes"]}', + ] + if daemon_type == 'jaeger-agent': + assert 'collector_nodes' in config + Tracing.components[daemon_type]['daemon_args'] = [ + f'--reporter.grpc.host-port={config["collector_nodes"]}', + '--processor.jaeger-compact.server-host-port=6799', + ] + + def __init__(self, ident: DaemonIdentity) -> None: + self._identity = ident + self._configured = False + + def _configure(self, ctx: CephadmContext) -> None: + if self._configured: + return + config = fetch_configs(ctx) + # Currently, this method side-effects the class attribute, and that + # is unpleasant. In the future it would be nice to move all of + # set_configuration into _confiure and only modify each classes data + # independently + self.set_configuration(config, self.identity.daemon_type) + self._configured = True + + @classmethod + def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'Tracing': + return cls(ident) + + @property + def identity(self) -> DaemonIdentity: + return self._identity + + def container(self, ctx: CephadmContext) -> CephContainer: + ctr = daemon_to_container(ctx, self) + return to_deployment_container(ctx, ctr) + + def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: + return 65534, 65534 + + def get_daemon_args(self) -> List[str]: + return self.components[self.identity.daemon_type].get( + 'daemon_args', [] + ) + + def customize_process_args( + self, ctx: CephadmContext, args: List[str] + ) -> None: + self._configure(ctx) + # earlier code did an explicit check if the daemon type was jaeger-agent + # and would only call get_daemon_args if that was true. However, since + # the function only returns a non-empty list in the case of jaeger-agent + # that check is unnecessary and is not brought over. + args.extend(self.get_daemon_args()) + + def customize_container_envs( + self, ctx: CephadmContext, envs: List[str] + ) -> None: + self._configure(ctx) + envs.extend( + self.components[self.identity.daemon_type].get('envs', []) + ) + + def default_entrypoint(self) -> str: + return '' diff --git a/src/cephadm/cephadmlib/deployment_utils.py b/src/cephadm/cephadmlib/deployment_utils.py new file mode 100644 index 00000000000..908fa979f1a --- /dev/null +++ b/src/cephadm/cephadmlib/deployment_utils.py @@ -0,0 +1,35 @@ +import os + +from .container_types import CephContainer +from .context import CephadmContext +from cephadmlib.context_getters import fetch_custom_config_files + + +def to_deployment_container( + ctx: CephadmContext, ctr: CephContainer +) -> CephContainer: + """Given a standard ceph container instance return a CephContainer + prepared for a deployment as a daemon, having the extra args and + custom configurations added. + NOTE: The `ctr` object is mutated before being returned. + """ + if 'extra_container_args' in ctx and ctx.extra_container_args: + ctr.container_args.extend(ctx.extra_container_args) + if 'extra_entrypoint_args' in ctx and ctx.extra_entrypoint_args: + ctr.args.extend(ctx.extra_entrypoint_args) + ccfiles = fetch_custom_config_files(ctx) + if ccfiles: + mandatory_keys = ['mount_path', 'content'] + for conf in ccfiles: + if all(k in conf for k in mandatory_keys): + mount_path = conf['mount_path'] + assert ctr.identity + file_path = os.path.join( + ctx.data_dir, + ctr.identity.fsid, + 'custom_config_files', + ctr.identity.daemon_name, + os.path.basename(mount_path), + ) + ctr.volume_mounts[file_path] = mount_path + return ctr diff --git a/src/cephadm/cephadmlib/file_utils.py b/src/cephadm/cephadmlib/file_utils.py index 7c9e6f69e43..1b9f11499a4 100644 --- a/src/cephadm/cephadmlib/file_utils.py +++ b/src/cephadm/cephadmlib/file_utils.py @@ -139,3 +139,7 @@ def get_file_timestamp(fn): ).strftime(DATEFMT) except Exception: return None + + +def make_run_dir(fsid: str, uid: int, gid: int) -> None: + makedirs(f'/var/run/ceph/{fsid}', uid, gid, 0o770) diff --git a/src/cephadm/cephadmlib/logging.py b/src/cephadm/cephadmlib/logging.py index 5e306484b98..f5893d3a51d 100644 --- a/src/cephadm/cephadmlib/logging.py +++ b/src/cephadm/cephadmlib/logging.py @@ -12,6 +12,10 @@ from typing import List, Any, Dict, Optional, cast from .context import CephadmContext from .constants import QUIET_LOG_LEVEL, LOG_DIR +from cephadmlib.file_utils import write_new + +from cephadmlib import templating + class _ExcludeErrorsFilter(logging.Filter): def filter(self, record: logging.LogRecord) -> bool: @@ -145,18 +149,6 @@ _interactive_logging_config = { } -_logrotate_data = """# created by cephadm -/var/log/ceph/cephadm.log { - rotate 7 - daily - compress - missingok - notifempty - su root root -} -""" - - _VERBOSE_HANDLERS = [ 'console', 'console_stdout', @@ -222,9 +214,7 @@ def cephadm_init_logging( logger.setLevel(QUIET_LOG_LEVEL) - if not os.path.exists(ctx.logrotate_dir + '/cephadm'): - with open(ctx.logrotate_dir + '/cephadm', 'w') as f: - f.write(_logrotate_data) + write_cephadm_logrotate_config(ctx) for handler in logger.handlers: # the following little hack ensures that no matter how cephadm is named @@ -239,3 +229,48 @@ def cephadm_init_logging( if ctx.verbose and handler.name in _VERBOSE_HANDLERS: handler.setLevel(QUIET_LOG_LEVEL) logger.debug('%s\ncephadm %s' % ('-' * 80, args)) + + +def write_cephadm_logrotate_config(ctx: CephadmContext) -> None: + if not os.path.exists(ctx.logrotate_dir + '/cephadm'): + with open(ctx.logrotate_dir + '/cephadm', 'w') as f: + cephadm_logrotate_config = templating.render( + ctx, templating.Templates.cephadm_logrotate_config + ) + f.write(cephadm_logrotate_config) + + +def write_cluster_logrotate_config(ctx: CephadmContext, fsid: str) -> None: + # logrotate for the cluster + with write_new(ctx.logrotate_dir + f'/ceph-{fsid}', perms=None) as f: + """ + See cephadm/cephadmlib/templates/cluster.logrotate.config.j2 to + get a better idea what this comment is referring to + + This is a bit sloppy in that the killall/pkill will touch all ceph daemons + in all containers, but I don't see an elegant way to send SIGHUP *just* to + the daemons for this cluster. (1) systemd kill -s will get the signal to + podman, but podman will exit. (2) podman kill will get the signal to the + first child (bash), but that isn't the ceph daemon. This is simpler and + should be harmless. + """ + targets: List[str] = [ + 'ceph-mon', + 'ceph-mgr', + 'ceph-mds', + 'ceph-osd', + 'ceph-fuse', + 'radosgw', + 'rbd-mirror', + 'cephfs-mirror', + 'tcmu-runner', + ] + + logrotate_config = templating.render( + ctx, + templating.Templates.cluster_logrotate_config, + fsid=fsid, + targets=targets, + ) + + f.write(logrotate_config) diff --git a/src/cephadm/cephadmlib/runscripts.py b/src/cephadm/cephadmlib/runscripts.py new file mode 100644 index 00000000000..7772248f362 --- /dev/null +++ b/src/cephadm/cephadmlib/runscripts.py @@ -0,0 +1,244 @@ +import contextlib +import json +import pathlib +import shlex + +from typing import Any, Dict, Union, List, IO, TextIO, Optional, cast + +from .container_engines import Podman +from .container_types import CephContainer, InitContainer +from .context import CephadmContext +from .context_getters import fetch_meta +from .daemon_identity import DaemonIdentity +from .file_utils import write_new +from .net_utils import EndPoint + + +# Ideally, all ContainerCommands would be converted to init containers. Until +# that is done one can wrap a CephContainer in a ContainerCommand object and +# pass that as a pre- or post- command to run arbitrary container based +# commands in the script. +class ContainerCommand: + def __init__( + self, + container: CephContainer, + comment: str = '', + background: bool = False, + ): + self.container = container + self.comment = comment + self.background = background + + +Command = Union[List[str], str, ContainerCommand] + + +def write_service_scripts( + ctx: CephadmContext, + ident: DaemonIdentity, + *, + container: CephContainer, + init_containers: Optional[List[InitContainer]] = None, + endpoints: Optional[List[EndPoint]] = None, + pre_start_commands: Optional[List[Command]] = None, + post_stop_commands: Optional[List[Command]] = None, + timeout: Optional[int] = None, +) -> None: + """Write the scripts that systemd services will call in order to + start/stop/etc components of a cephadm managed daemon. Also writes some + metadata about the service getting deployed. + """ + data_dir = pathlib.Path(ident.data_dir(ctx.data_dir)) + run_file_path = data_dir / 'unit.run' + meta_file_path = data_dir / 'unit.meta' + post_stop_file_path = data_dir / 'unit.poststop' + stop_file_path = data_dir / 'unit.stop' + image_file_path = data_dir / 'unit.image' + # use an ExitStack to make writing the files an all-or-nothing affair. If + # any file fails to write then the write_new'd file will not get renamed + # into place + with contextlib.ExitStack() as estack: + # write out the main file to run (start) a service + runf = estack.enter_context(write_new(run_file_path)) + runf.write('set -e\n') + for command in pre_start_commands or []: + _write_command(ctx, runf, command) + init_containers = init_containers or [] + if init_containers: + _write_init_container_cmds_clean(ctx, runf, init_containers[0]) + for idx, ic in enumerate(init_containers): + _write_init_container_cmds(ctx, runf, idx, ic) + _write_container_cmd_to_bash(ctx, runf, container, ident.daemon_name) + + # some metadata about the deploy + metaf = estack.enter_context(write_new(meta_file_path)) + meta: Dict[str, Any] = fetch_meta(ctx) + meta.update( + { + 'memory_request': int(ctx.memory_request) + if ctx.memory_request + else None, + 'memory_limit': int(ctx.memory_limit) + if ctx.memory_limit + else None, + } + ) + if not meta.get('ports'): + if endpoints: + meta['ports'] = [e.port for e in endpoints] + else: + meta['ports'] = [] + metaf.write(json.dumps(meta, indent=4) + '\n') + + # post-stop command(s) + pstopf = estack.enter_context(write_new(post_stop_file_path)) + # this is a fallback to eventually stop any underlying container that + # was not stopped properly by unit.stop, this could happen in very slow + # setups as described in the issue + # https://tracker.ceph.com/issues/58242. + _write_stop_actions(ctx, cast(TextIO, pstopf), container, timeout) + for command in post_stop_commands or []: + _write_command(ctx, pstopf, command) + + # stop command(s) + stopf = estack.enter_context(write_new(stop_file_path)) + _write_stop_actions(ctx, cast(TextIO, stopf), container, timeout) + + if container: + imgf = estack.enter_context(write_new(image_file_path)) + imgf.write(container.image + '\n') + + +def _write_container_cmd_to_bash( + ctx: CephadmContext, + file_obj: IO[str], + container: 'CephContainer', + comment: Optional[str] = None, + background: Optional[bool] = False, +) -> None: + if comment: + # Sometimes adding a comment, especially if there are multiple containers in one + # unit file, makes it easier to read and grok. + assert '\n' not in comment + file_obj.write(f'# {comment}\n') + # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually + _bash_cmd( + file_obj, container.rm_cmd(old_cname=True), check=False, stderr=False + ) + _bash_cmd(file_obj, container.rm_cmd(), check=False, stderr=False) + + # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage` + if isinstance(ctx.container_engine, Podman): + _bash_cmd( + file_obj, + container.rm_cmd(storage=True), + check=False, + stderr=False, + ) + _bash_cmd( + file_obj, + container.rm_cmd(old_cname=True, storage=True), + check=False, + stderr=False, + ) + + # container run command + _bash_cmd(file_obj, container.run_cmd(), background=bool(background)) + + +def _write_init_container_cmds( + ctx: CephadmContext, + file_obj: IO[str], + index: int, + init_container: 'InitContainer', +) -> None: + file_obj.write(f'# init container {index}: {init_container.cname}\n') + _bash_cmd(file_obj, init_container.run_cmd()) + _write_init_container_cmds_clean( + ctx, file_obj, init_container, comment='' + ) + + +def _write_init_container_cmds_clean( + ctx: CephadmContext, + file_obj: IO[str], + init_container: 'InitContainer', + comment: str = 'init container cleanup', +) -> None: + if comment: + assert '\n' not in comment + file_obj.write(f'# {comment}\n') + _bash_cmd( + file_obj, + init_container.rm_cmd(), + check=False, + stderr=False, + ) + # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage` + if isinstance(ctx.container_engine, Podman): + _bash_cmd( + file_obj, + init_container.rm_cmd(storage=True), + check=False, + stderr=False, + ) + + +def _write_stop_actions( + ctx: CephadmContext, + f: TextIO, + container: 'CephContainer', + timeout: Optional[int], +) -> None: + # following generated script basically checks if the container exists + # before stopping it. Exit code will be success either if it doesn't + # exist or if it exists and is stopped successfully. + container_exists = f'{ctx.container_engine.path} inspect %s &>/dev/null' + f.write( + f'! {container_exists % container.old_cname} || {" ".join(container.stop_cmd(old_cname=True, timeout=timeout))} \n' + ) + f.write( + f'! {container_exists % container.cname} || {" ".join(container.stop_cmd(timeout=timeout))} \n' + ) + + +def _bash_cmd( + fh: IO[str], + cmd: List[str], + check: bool = True, + background: bool = False, + stderr: bool = True, +) -> None: + line = ' '.join(shlex.quote(arg) for arg in cmd) + if not check: + line = f'! {line}' + if not stderr: + line = f'{line} 2> /dev/null' + if background: + line = f'{line} &' + fh.write(line) + fh.write('\n') + + +def _write_command( + ctx: CephadmContext, + fh: IO[str], + cmd: Command, +) -> None: + """Wrapper func for turning a command list or string into something suitable + for appending to a run script. + """ + if isinstance(cmd, list): + _bash_cmd(fh, cmd) + elif isinstance(cmd, ContainerCommand): + _write_container_cmd_to_bash( + ctx, + fh, + cmd.container, + comment=cmd.comment, + background=cmd.background, + ) + else: + fh.write(cmd) + if not cmd.endswith('\n'): + fh.write('\n') diff --git a/src/cephadm/cephadmlib/systemd_unit.py b/src/cephadm/cephadmlib/systemd_unit.py new file mode 100644 index 00000000000..0c392c77bfe --- /dev/null +++ b/src/cephadm/cephadmlib/systemd_unit.py @@ -0,0 +1,81 @@ +# systemd_unit.py - creating/managing systemd unit files + +import os + +from . import templating +from .call_wrappers import call_throws +from .container_engines import Docker, Podman +from .context import CephadmContext +from .daemon_identity import DaemonIdentity +from .file_utils import write_new +from .logging import write_cluster_logrotate_config + + +def _get_unit_file(ctx: CephadmContext, fsid: str) -> str: + has_docker_engine = isinstance(ctx.container_engine, Docker) + has_podman_engine = isinstance(ctx.container_engine, Podman) + has_podman_split_version = ( + has_podman_engine and ctx.container_engine.supports_split_cgroups + ) + return templating.render( + ctx, + templating.Templates.ceph_service, + fsid=fsid, + has_docker_engine=has_docker_engine, + has_podman_engine=has_podman_engine, + has_podman_split_version=has_podman_split_version, + ) + + +def _install_base_units(ctx: CephadmContext, fsid: str) -> None: + """ + Set up ceph.target and ceph-$fsid.target units. + """ + # global unit + existed = os.path.exists(ctx.unit_dir + '/ceph.target') + with write_new(ctx.unit_dir + '/ceph.target', perms=None) as f: + f.write('[Unit]\n' + 'Description=All Ceph clusters and services\n' + '\n' + '[Install]\n' + 'WantedBy=multi-user.target\n') + if not existed: + # we disable before enable in case a different ceph.target + # (from the traditional package) is present; while newer + # systemd is smart enough to disable the old + # (/lib/systemd/...) and enable the new (/etc/systemd/...), + # some older versions of systemd error out with EEXIST. + call_throws(ctx, ['systemctl', 'disable', 'ceph.target']) + call_throws(ctx, ['systemctl', 'enable', 'ceph.target']) + call_throws(ctx, ['systemctl', 'start', 'ceph.target']) + + # cluster unit + existed = os.path.exists(ctx.unit_dir + '/ceph-%s.target' % fsid) + with write_new(ctx.unit_dir + f'/ceph-{fsid}.target', perms=None) as f: + f.write( + '[Unit]\n' + 'Description=Ceph cluster {fsid}\n' + 'PartOf=ceph.target\n' + 'Before=ceph.target\n' + '\n' + '[Install]\n' + 'WantedBy=multi-user.target ceph.target\n'.format( + fsid=fsid) + ) + if not existed: + call_throws(ctx, ['systemctl', 'enable', 'ceph-%s.target' % fsid]) + call_throws(ctx, ['systemctl', 'start', 'ceph-%s.target' % fsid]) + + # don't overwrite file in order to allow users to manipulate it + if os.path.exists(ctx.logrotate_dir + f'/ceph-{fsid}'): + return + + write_cluster_logrotate_config(ctx, fsid) + + +def update_files(ctx: CephadmContext, ident: DaemonIdentity) -> None: + _install_base_units(ctx, ident.fsid) + unit = _get_unit_file(ctx, ident.fsid) + unit_file = 'ceph-%s@.service' % (ident.fsid) + with write_new(ctx.unit_dir + '/' + unit_file, perms=None) as f: + f.write(unit) diff --git a/src/cephadm/cephadmlib/templates/cephadm.logrotate.config.j2 b/src/cephadm/cephadmlib/templates/cephadm.logrotate.config.j2 new file mode 100644 index 00000000000..b18aaff2196 --- /dev/null +++ b/src/cephadm/cephadmlib/templates/cephadm.logrotate.config.j2 @@ -0,0 +1,9 @@ +# created by cephadm +/var/log/ceph/cephadm.log { + rotate 7 + daily + compress + missingok + notifempty + su root root +} diff --git a/src/cephadm/cephadmlib/templates/cluster.logrotate.config.j2 b/src/cephadm/cephadmlib/templates/cluster.logrotate.config.j2 new file mode 100644 index 00000000000..9af2f955d90 --- /dev/null +++ b/src/cephadm/cephadmlib/templates/cluster.logrotate.config.j2 @@ -0,0 +1,13 @@ +# created by cephadm +/var/log/ceph/{{ fsid }}/*.log { + rotate 7 + daily + compress + sharedscripts + postrotate + killall -q -1 {{ targets|join(' ') }} || pkill -1 -x '{{ targets|join('|') }}' || true + endscript + missingok + notifempty + su root root +} diff --git a/src/cephadm/cephadmlib/templating.py b/src/cephadm/cephadmlib/templating.py index e6e8d5e0ea2..5f4dda05608 100644 --- a/src/cephadm/cephadmlib/templating.py +++ b/src/cephadm/cephadmlib/templating.py @@ -3,6 +3,7 @@ import enum import os import posixpath +import shlex import zipimport from typing import Any, Optional, IO, Tuple, Callable, cast @@ -21,6 +22,8 @@ class Templates(str, enum.Enum): ceph_service = 'ceph.service.j2' agent_service = 'agent.service.j2' + cluster_logrotate_config = 'cluster.logrotate.config.j2' + cephadm_logrotate_config = 'cephadm.logrotate.config.j2' def __str__(self) -> str: return self.value @@ -29,6 +32,25 @@ class Templates(str, enum.Enum): return repr(self.value) +class TemplateNotFoundInZipApp(jinja2.TemplateNotFound): + def __init__( + self, + template: str, + *, + relative_path: str = '', + archive_path: str = '', + ) -> None: + super().__init__(template) + self.relative_path = relative_path + self.archive_path = archive_path + + def __str__(self) -> str: + return ( + f'{self.message}: path {self.relative_path!r}' + f' not found in {self.archive_path!r}' + ) + + class _PackageLoader(jinja2.PackageLoader): """Workaround for PackageLoader when using cephadm with relative paths. @@ -58,20 +80,23 @@ class _PackageLoader(jinja2.PackageLoader): def _get_archive_source(self, template: str) -> Tuple[str, str, None]: assert isinstance(self._loader, zipimport.zipimporter) - path = arelpath = os.path.normpath( - posixpath.join( - self._template_root, - *jinja2.loaders.split_template_path(template) - ) + arelpath = posixpath.join( + self.package_name, self.package_path, template ) - archive_path = os.path.normpath(self._loader.archive) - if arelpath.startswith(archive_path + '/'): - plen = len(archive_path) + 1 - arelpath = arelpath[plen:] + if any(p == '.' or p == '..' for p in arelpath.split(posixpath.sep)): + raise ValueError('template path contains invalid components') try: source = cast(bytes, self._loader.get_data(arelpath)) except OSError as e: - raise jinja2.TemplateNotFound(template) from e + not_found = TemplateNotFoundInZipApp( + template, + relative_path=arelpath, + archive_path=self._loader.archive, + ) + raise not_found from e + path = os.path.normpath( + posixpath.join(self._loader.archive, arelpath) + ) return source.decode(self.encoding), path, None @@ -89,6 +114,7 @@ class Templater: def _env(self) -> jinja2.Environment: if self._jinja2_env is None: self._jinja2_env = jinja2.Environment(loader=self._loader) + self._jinja2_env.filters['shellquote'] = shlex.quote return self._jinja2_env @property diff --git a/src/cephadm/tests/fixtures.py b/src/cephadm/tests/fixtures.py index d25dffa9e3b..572c1f9969d 100644 --- a/src/cephadm/tests/fixtures.py +++ b/src/cephadm/tests/fixtures.py @@ -6,7 +6,7 @@ import time from contextlib import contextmanager from pyfakefs import fake_filesystem -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Any def import_cephadm(): @@ -183,3 +183,83 @@ def with_cephadm_ctx( else: yield ctx + +@pytest.fixture() +def funkypatch(monkeypatch): + """Defines the funkypatch fixtures that acts like a mixture between + mock.patch and pytest's monkeypatch fixture. + """ + fp = FunkyPatcher(monkeypatch) + yield fp + + +class FunkyPatcher: + """FunkyPatcher monkeypatches all imported instances of an object. + + Use `patch` to patch the canonical location of an object and FunkyPatcher + will automatically replace other imports of that object. + """ + + def __init__(self, monkeypatcher): + self._mp = monkeypatcher + # keep track of objects we've already patched. this dictionary + # maps a (module-name, object-name) tuple to the original object + # before patching. This could be used to determine if a name has + # already been patched or compare a patched object to the original. + self._originals: Dict[Tuple[str, str], Any] = {} + + def patch( + self, + mod: str, + name: str = '', + *, + dest: Any = None, + force: bool = False, + ) -> Any: + """Patch an object and all existing imports of that object. + Specify mod as `my.mod.name.obj` where obj is name of the object to be + patched or as `my.mod.name` and specify `name` as the name of the + object to be patched. + If the object to be patched is not imported as the same name in `mod` + it will *not* be automatically patched. In other words, `from + my.mod.name import foo` will work, but `from my.mod.name import foo as + _foo` will not. + Use the keyword-only argument `dest` to specify the new object to be + used. A MagicMock will be created and used if dest is None. + Use the keyword-only argument `force` to override checks that a mocked + objects are the same across modules. This can be used in the case that + some other code already patched an object and you want funkypatch to + override that patch (use with caution). + Returns the patched object (the MagicMock or supplied dest). + """ + import sys + import importlib + + if not name: + mod, name = mod.rsplit('.', 1) + modname = (mod, name) + # We don't strictly need the check but patching already patched objs is + # confusing to think about. It's better to block it for now and perhaps + # later we can relax these restrictions or be clever in some way. + if modname in self._originals: + raise KeyError(f'{modname} already patched') + + if dest is None: + dest = mock.MagicMock() + + imod = importlib.import_module(mod) + self._originals[modname] = getattr(imod, name) + + for mname, imod in sys.modules.items(): + try: + obj = getattr(imod, name) + except AttributeError: + # no matching name in module + continue + # make sure that the module imported the same object as the + # one we want to patch out, and not just some naming collision. + # ensure the original object and the one in the module are the + # same object + if obj is self._originals[modname] or force: + self._mp.setattr(imod, name, dest) + return dest diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py index 8db8edd0c1b..82850ab597d 100644 --- a/src/cephadm/tests/test_cephadm.py +++ b/src/cephadm/tests/test_cephadm.py @@ -16,6 +16,7 @@ from .fixtures import ( with_cephadm_ctx, mock_bad_firewalld, import_cephadm, + funkypatch, ) from pyfakefs import fake_filesystem @@ -317,13 +318,17 @@ class TestCephAdm(object): with pytest.raises(Exception): _cephadm.prepare_dashboard(ctx, 0, 0, lambda _, extra_mounts=None, ___=None : '5', lambda : None) - @mock.patch('cephadm.logger') - @mock.patch('cephadm.fetch_custom_config_files') - @mock.patch('cephadm.get_container') - def test_to_deployment_container(self, _get_container, _get_config, _logger): + def test_to_deployment_container(self, funkypatch): """ test to_deployment_container properly makes use of extra container args and custom conf files """ + from cephadmlib.deployment_utils import to_deployment_container + + funkypatch.patch('cephadm.logger') + _get_config = funkypatch.patch( + 'cephadmlib.deployment_utils.fetch_custom_config_files' + ) + _get_container = funkypatch.patch('cephadm.get_container') ctx = _cephadm.CephadmContext() ctx.config_json = '-' @@ -357,32 +362,45 @@ class TestCephAdm(object): host_network=True, ) c = _cephadm.get_container(ctx, ident) - c = _cephadm.to_deployment_container(ctx, c) + c = to_deployment_container(ctx, c) assert '--pids-limit=12345' in c.container_args assert '--something' in c.container_args assert os.path.join('data', '9b9d7609-f4d5-4aba-94c8-effa764d96c9', 'custom_config_files', 'grafana.host1', 'testing.str') in c.volume_mounts assert c.volume_mounts[os.path.join('data', '9b9d7609-f4d5-4aba-94c8-effa764d96c9', 'custom_config_files', 'grafana.host1', 'testing.str')] == '/etc/testing.str' - @mock.patch('cephadm.logger') - @mock.patch('cephadm.FileLock') - @mock.patch('cephadm.deploy_daemon') - @mock.patch('cephadm.make_var_run') - @mock.patch('cephadm.migrate_sysctl_dir') - @mock.patch('cephadm.check_unit', lambda *args, **kwargs: (None, 'running', None)) - @mock.patch('cephadm.get_unit_name', lambda *args, **kwargs: 'mon-unit-name') - @mock.patch('cephadm.extract_uid_gid', lambda *args, **kwargs: (0, 0)) - @mock.patch('cephadm.get_container') - @mock.patch('cephadm.apply_deploy_config_to_ctx', lambda d, c: None) - def test_mon_crush_location(self, _get_container, _migrate_sysctl, _make_var_run, _deploy_daemon, _file_lock, _logger, monkeypatch): + def test_mon_crush_location(self, funkypatch): """ test that crush location for mon is set if it is included in config_json """ - _fetch_configs = mock.MagicMock() - monkeypatch.setattr('cephadmlib.context_getters.fetch_configs', _fetch_configs) - monkeypatch.setattr('cephadm.fetch_configs', _fetch_configs) - monkeypatch.setattr('cephadm.read_configuration_source', lambda c: {}) - monkeypatch.setattr('cephadm.fetch_custom_config_files', mock.MagicMock()) + funkypatch.patch('cephadm.logger') + funkypatch.patch('cephadm.FileLock') + _deploy_daemon = funkypatch.patch('cephadm.deploy_daemon') + funkypatch.patch('cephadm.make_var_run') + funkypatch.patch('cephadmlib.file_utils.make_run_dir') + _migrate_sysctl = funkypatch.patch('cephadm.migrate_sysctl_dir') + funkypatch.patch( + 'cephadm.check_unit', + dest=lambda *args, **kwargs: (None, 'running', None), + ) + funkypatch.patch( + 'cephadm.get_unit_name', + dest=lambda *args, **kwargs: 'mon-unit-name', + ) + funkypatch.patch( + 'cephadm.extract_uid_gid', dest=lambda *args, **kwargs: (0, 0) + ) + _get_container = funkypatch.patch('cephadm.get_container') + funkypatch.patch( + 'cephadm.apply_deploy_config_to_ctx', dest=lambda d, c: None + ) + _fetch_configs = funkypatch.patch( + 'cephadmlib.context_getters.fetch_configs' + ) + funkypatch.patch( + 'cephadm.read_configuration_source', dest=lambda c: {} + ) + funkypatch.patch('cephadm.fetch_custom_config_files') ctx = _cephadm.CephadmContext() ctx.name = 'mon.test' @@ -541,25 +559,31 @@ class TestCephAdm(object): def test_dict_get(self): - result = _cephadm.dict_get({'a': 1}, 'a', require=True) + from cephadmlib.data_utils import dict_get + + result = dict_get({'a': 1}, 'a', require=True) assert result == 1 - result = _cephadm.dict_get({'a': 1}, 'b') + result = dict_get({'a': 1}, 'b') assert result is None - result = _cephadm.dict_get({'a': 1}, 'b', default=2) + result = dict_get({'a': 1}, 'b', default=2) assert result == 2 def test_dict_get_error(self): + from cephadmlib.data_utils import dict_get + with pytest.raises(_cephadm.Error): - _cephadm.dict_get({'a': 1}, 'b', require=True) + dict_get({'a': 1}, 'b', require=True) def test_dict_get_join(self): - result = _cephadm.dict_get_join({'foo': ['a', 'b']}, 'foo') + from cephadmlib.data_utils import dict_get_join + + result = dict_get_join({'foo': ['a', 'b']}, 'foo') assert result == 'a\nb' - result = _cephadm.dict_get_join({'foo': [1, 2]}, 'foo') + result = dict_get_join({'foo': [1, 2]}, 'foo') assert result == '1\n2' - result = _cephadm.dict_get_join({'bar': 'a'}, 'bar') + result = dict_get_join({'bar': 'a'}, 'bar') assert result == 'a' - result = _cephadm.dict_get_join({'a': 1}, 'a') + result = dict_get_join({'a': 1}, 'a') assert result == 1 @mock.patch('os.listdir', return_value=[]) @@ -761,24 +785,26 @@ class TestCephAdm(object): assert _cephadm.get_container_info(ctx, daemon_filter, by_name) == output def test_should_log_to_journald(self): + from cephadmlib import context_getters + ctx = _cephadm.CephadmContext() # explicit ctx.log_to_journald = True - assert _cephadm.should_log_to_journald(ctx) + assert context_getters.should_log_to_journald(ctx) ctx.log_to_journald = None # enable if podman support --cgroup=split ctx.container_engine = mock_podman() ctx.container_engine.version = (2, 1, 0) - assert _cephadm.should_log_to_journald(ctx) + assert context_getters.should_log_to_journald(ctx) # disable on old podman ctx.container_engine.version = (2, 0, 0) - assert not _cephadm.should_log_to_journald(ctx) + assert not context_getters.should_log_to_journald(ctx) # disable on docker ctx.container_engine = mock_docker() - assert not _cephadm.should_log_to_journald(ctx) + assert not context_getters.should_log_to_journald(ctx) def test_normalize_image_digest(self): s = 'myhostname:5000/ceph/ceph@sha256:753886ad9049004395ae990fbb9b096923b5a518b819283141ee8716ddf55ad1' @@ -1186,15 +1212,17 @@ class TestMaintenance: class TestMonitoring(object): - @mock.patch('cephadm.call') + @mock.patch('cephadmlib.daemons.monitoring.call') def test_get_version_alertmanager(self, _call): + from cephadmlib.daemons import monitoring + ctx = _cephadm.CephadmContext() ctx.container_engine = mock_podman() daemon_type = 'alertmanager' # binary `prometheus` _call.return_value = '', '{}, version 0.16.1'.format(daemon_type), 0 - version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type) + version = monitoring.Monitoring.get_version(ctx, 'container_id', daemon_type) assert version == '0.16.1' # binary `prometheus-alertmanager` @@ -1205,13 +1233,15 @@ class TestMonitoring(object): version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type) assert version == '0.16.1' - @mock.patch('cephadm.call') + @mock.patch('cephadmlib.daemons.monitoring.call') def test_get_version_prometheus(self, _call): + from cephadmlib.daemons import monitoring + ctx = _cephadm.CephadmContext() ctx.container_engine = mock_podman() daemon_type = 'prometheus' _call.return_value = '', '{}, version 0.16.1'.format(daemon_type), 0 - version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type) + version = monitoring.Monitoring.get_version(ctx, 'container_id', daemon_type) assert version == '0.16.1' def test_prometheus_external_url(self): @@ -1225,13 +1255,15 @@ class TestMonitoring(object): ).get_daemon_args() assert any([x.startswith('--web.external-url=http://') for x in args]) - @mock.patch('cephadm.call') + @mock.patch('cephadmlib.daemons.monitoring.call') def test_get_version_node_exporter(self, _call): + from cephadmlib.daemons import monitoring + ctx = _cephadm.CephadmContext() ctx.container_engine = mock_podman() daemon_type = 'node-exporter' _call.return_value = '', '{}, version 0.16.1'.format(daemon_type.replace('-', '_')), 0 - version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type) + version = monitoring.Monitoring.get_version(ctx, 'container_id', daemon_type) assert version == '0.16.1' def test_create_daemon_dirs_prometheus(self, cephadm_fs): @@ -2113,16 +2145,12 @@ class TestValidateRepo: class TestPull: - - @mock.patch('time.sleep') - @mock.patch('cephadm.get_image_info_from_inspect', return_value={}) - @mock.patch('cephadm.logger') - def test_error(self, _logger, _get_image_info_from_inspect, _sleep, monkeypatch): - # manually create a mock and use pytest's monkeypatch fixture to set - # multiple targets to the *same* mock - _call = mock.MagicMock() - monkeypatch.setattr('cephadm.call', _call) - monkeypatch.setattr('cephadmlib.call_wrappers.call', _call) + def test_error(self, funkypatch): + funkypatch.patch('time.sleep') + funkypatch.patch('cephadm.logger') + _giifi = funkypatch.patch('cephadm.get_image_info_from_inspect') + _giifi.return_value = {} + _call = funkypatch.patch('cephadmlib.call_wrappers.call') ctx = _cephadm.CephadmContext() ctx.container_engine = mock_podman() ctx.insecure = False diff --git a/src/cephadm/tests/test_container_engine.py b/src/cephadm/tests/test_container_engine.py index 7c5ef513127..49f9f9a2e50 100644 --- a/src/cephadm/tests/test_container_engine.py +++ b/src/cephadm/tests/test_container_engine.py @@ -4,7 +4,7 @@ import pytest from tests.fixtures import with_cephadm_ctx, import_cephadm -_cephadm = import_cephadm() +from cephadmlib import container_engines _find_program_loc = 'cephadmlib.container_engine_base.find_program' @@ -29,7 +29,7 @@ def test_container_engine(): def test_podman(): with mock.patch(_find_program_loc) as find_program: find_program.return_value = "/usr/bin/podman" - pm = _cephadm.Podman() + pm = container_engines.Podman() find_program.assert_called() with pytest.raises(RuntimeError): pm.version @@ -44,7 +44,7 @@ def test_podman(): def test_podman_badversion(): with mock.patch(_find_program_loc) as find_program: find_program.return_value = "/usr/bin/podman" - pm = _cephadm.Podman() + pm = container_engines.Podman() find_program.assert_called() with mock.patch(_call_throws_loc) as call_throws: call_throws.return_value = ("4.10.beta2", None, None) @@ -56,5 +56,5 @@ def test_podman_badversion(): def test_docker(): with mock.patch(_find_program_loc) as find_program: find_program.return_value = "/usr/bin/docker" - docker = _cephadm.Docker() + docker = container_engines.Docker() assert str(docker) == "docker (/usr/bin/docker)" diff --git a/src/cephadm/tests/test_daemon_form.py b/src/cephadm/tests/test_daemon_form.py index 07896cc5855..a2d1773f1c8 100644 --- a/src/cephadm/tests/test_daemon_form.py +++ b/src/cephadm/tests/test_daemon_form.py @@ -6,6 +6,7 @@ from .fixtures import import_cephadm from cephadmlib import daemon_form from cephadmlib import daemon_identity +from cephadmlib import daemons _cephadm = import_cephadm() @@ -22,7 +23,7 @@ _cephadm = import_cephadm() ('mon', _cephadm.Ceph), ('nfs', _cephadm.NFSGanesha), ('nvmeof', _cephadm.CephNvmeof), - ('osd', _cephadm.OSD), + ('osd', daemons.OSD), ('prometheus', _cephadm.Monitoring), ('snmp-gateway', _cephadm.SNMPGateway), ], diff --git a/src/cephadm/tests/test_deploy.py b/src/cephadm/tests/test_deploy.py index c77b243dfa7..dadf3456fd5 100644 --- a/src/cephadm/tests/test_deploy.py +++ b/src/cephadm/tests/test_deploy.py @@ -8,38 +8,35 @@ from .fixtures import ( import_cephadm, mock_podman, with_cephadm_ctx, + FunkyPatcher, + funkypatch, ) _cephadm = import_cephadm() -def _common_mp(monkeypatch): +def _common_patches(funkypatch): mocks = {} - _call = mock.MagicMock(return_value=('', '', 0)) - monkeypatch.setattr('cephadmlib.container_types.call', _call) + _call = funkypatch.patch('cephadmlib.container_types.call') + _call.return_value = ('', '', 0) mocks['call'] = _call - _call_throws = mock.MagicMock(return_value=0) - monkeypatch.setattr( - 'cephadmlib.container_types.call_throws', _call_throws - ) + _call_throws = funkypatch.patch('cephadmlib.container_types.call_throws') + _call_throws.return_value = ('', '', 0) mocks['call_throws'] = _call_throws - _firewalld = mock.MagicMock() + _firewalld = funkypatch.patch('cephadm.Firewalld') _firewalld().external_ports.get.return_value = [] - monkeypatch.setattr('cephadm.Firewalld', _firewalld) mocks['Firewalld'] = _firewalld - _extract_uid_gid = mock.MagicMock() + _extract_uid_gid = funkypatch.patch('cephadm.extract_uid_gid', force=True) _extract_uid_gid.return_value = (8765, 8765) - monkeypatch.setattr('cephadm.extract_uid_gid', _extract_uid_gid) mocks['extract_uid_gid'] = _extract_uid_gid - _install_sysctl = mock.MagicMock() - monkeypatch.setattr('cephadm.install_sysctl', _install_sysctl) + _install_sysctl = funkypatch.patch('cephadm.install_sysctl') mocks['install_sysctl'] = _install_sysctl return mocks -def test_deploy_nfs_container(cephadm_fs, monkeypatch): - mocks = _common_mp(monkeypatch) +def test_deploy_nfs_container(cephadm_fs, funkypatch): + mocks = _common_patches(funkypatch) _firewalld = mocks['Firewalld'] fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7' with with_cephadm_ctx([]) as ctx: @@ -75,8 +72,8 @@ def test_deploy_nfs_container(cephadm_fs, monkeypatch): assert f.read() == 'FAKE' -def test_deploy_snmp_container(cephadm_fs, monkeypatch): - mocks = _common_mp(monkeypatch) +def test_deploy_snmp_container(cephadm_fs, funkypatch): + mocks = _common_patches(funkypatch) _firewalld = mocks['Firewalld'] fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7' with with_cephadm_ctx([]) as ctx: @@ -107,8 +104,8 @@ def test_deploy_snmp_container(cephadm_fs, monkeypatch): assert not (basedir / 'keyring').exists() -def test_deploy_keepalived_container(cephadm_fs, monkeypatch): - mocks = _common_mp(monkeypatch) +def test_deploy_keepalived_container(cephadm_fs, funkypatch): + mocks = _common_patches(funkypatch) _firewalld = mocks['Firewalld'] _install_sysctl = mocks['install_sysctl'] fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7' @@ -155,8 +152,8 @@ def test_deploy_keepalived_container(cephadm_fs, monkeypatch): assert len(_install_sysctl.call_args[0][-1].get_sysctl_settings()) > 1 -def test_deploy_haproxy_container(cephadm_fs, monkeypatch): - mocks = _common_mp(monkeypatch) +def test_deploy_haproxy_container(cephadm_fs, funkypatch): + mocks = _common_patches(funkypatch) _firewalld = mocks['Firewalld'] _install_sysctl = mocks['install_sysctl'] fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7' @@ -200,8 +197,8 @@ def test_deploy_haproxy_container(cephadm_fs, monkeypatch): assert len(_install_sysctl.call_args[0][-1].get_sysctl_settings()) > 1 -def test_deploy_iscsi_container(cephadm_fs, monkeypatch): - mocks = _common_mp(monkeypatch) +def test_deploy_iscsi_container(cephadm_fs, funkypatch): + mocks = _common_patches(funkypatch) _firewalld = mocks['Firewalld'] fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7' with with_cephadm_ctx([]) as ctx: @@ -244,8 +241,8 @@ def test_deploy_iscsi_container(cephadm_fs, monkeypatch): assert (si.st_uid, si.st_gid) == (8765, 8765) -def test_deploy_nvmeof_container(cephadm_fs, monkeypatch): - mocks = _common_mp(monkeypatch) +def test_deploy_nvmeof_container(cephadm_fs, funkypatch): + mocks = _common_patches(funkypatch) _firewalld = mocks['Firewalld'] fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7' with with_cephadm_ctx([]) as ctx: @@ -290,11 +287,11 @@ def test_deploy_nvmeof_container(cephadm_fs, monkeypatch): assert (si.st_uid, si.st_gid) == (167, 167) -def test_deploy_a_monitoring_container(cephadm_fs, monkeypatch): - mocks = _common_mp(monkeypatch) +def test_deploy_a_monitoring_container(cephadm_fs, funkypatch): + mocks = _common_patches(funkypatch) _firewalld = mocks['Firewalld'] - _get_ip_addresses = mock.MagicMock(return_value=(['10.10.10.10'], [])) - monkeypatch.setattr('cephadm.get_ip_addresses', _get_ip_addresses) + _get_ip_addresses = funkypatch.patch('cephadmlib.net_utils.get_ip_addresses') + _get_ip_addresses.return_value = (['10.10.10.10'], []) fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7' with with_cephadm_ctx([]) as ctx: ctx.container_engine = mock_podman() @@ -330,8 +327,8 @@ def test_deploy_a_monitoring_container(cephadm_fs, monkeypatch): assert (si.st_uid, si.st_gid) == (8765, 8765) -def test_deploy_a_tracing_container(cephadm_fs, monkeypatch): - mocks = _common_mp(monkeypatch) +def test_deploy_a_tracing_container(cephadm_fs, funkypatch): + mocks = _common_patches(funkypatch) _firewalld = mocks['Firewalld'] fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7' with with_cephadm_ctx([]) as ctx: @@ -361,11 +358,10 @@ def test_deploy_a_tracing_container(cephadm_fs, monkeypatch): assert not (basedir / 'keyring').exists() -def test_deploy_ceph_mgr_container(cephadm_fs, monkeypatch): - mocks = _common_mp(monkeypatch) +def test_deploy_ceph_mgr_container(cephadm_fs, funkypatch): + mocks = _common_patches(funkypatch) _firewalld = mocks['Firewalld'] - _make_var_run = mock.MagicMock() - monkeypatch.setattr('cephadm.make_var_run', _make_var_run) + _make_run_dir = funkypatch.patch('cephadmlib.file_utils.make_run_dir') fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7' with with_cephadm_ctx([]) as ctx: ctx.container_engine = mock_podman() @@ -399,16 +395,15 @@ def test_deploy_ceph_mgr_container(cephadm_fs, monkeypatch): assert f.read() == 'XXXXXXX' with open(basedir / 'keyring') as f: assert f.read() == 'YYYYYY' - assert _make_var_run.call_count == 1 - assert _make_var_run.call_args[0][2] == 8765 - assert _make_var_run.call_args[0][3] == 8765 + assert _make_run_dir.call_count == 1 + assert _make_run_dir.call_args[0][1] == 8765 + assert _make_run_dir.call_args[0][2] == 8765 -def test_deploy_ceph_osd_container(cephadm_fs, monkeypatch): - mocks = _common_mp(monkeypatch) +def test_deploy_ceph_osd_container(cephadm_fs, funkypatch): + mocks = _common_patches(funkypatch) _firewalld = mocks['Firewalld'] - _make_var_run = mock.MagicMock() - monkeypatch.setattr('cephadm.make_var_run', _make_var_run) + _make_run_dir = funkypatch.patch('cephadmlib.file_utils.make_run_dir') fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7' with with_cephadm_ctx([]) as ctx: ctx.container_engine = mock_podman() @@ -444,18 +439,17 @@ def test_deploy_ceph_osd_container(cephadm_fs, monkeypatch): assert f.read() == 'XXXXXXX' with open(basedir / 'keyring') as f: assert f.read() == 'YYYYYY' - assert _make_var_run.call_count == 1 - assert _make_var_run.call_args[0][2] == 8765 - assert _make_var_run.call_args[0][3] == 8765 + assert _make_run_dir.call_count == 1 + assert _make_run_dir.call_args[0][1] == 8765 + assert _make_run_dir.call_args[0][2] == 8765 -def test_deploy_ceph_exporter_container(cephadm_fs, monkeypatch): - mocks = _common_mp(monkeypatch) +def test_deploy_ceph_exporter_container(cephadm_fs, funkypatch): + mocks = _common_patches(funkypatch) _firewalld = mocks['Firewalld'] - _get_ip_addresses = mock.MagicMock(return_value=(['10.10.10.10'], [])) - monkeypatch.setattr('cephadm.get_ip_addresses', _get_ip_addresses) - _make_var_run = mock.MagicMock() - monkeypatch.setattr('cephadm.make_var_run', _make_var_run) + _get_ip_addresses = funkypatch.patch('cephadmlib.net_utils.get_ip_addresses') + _get_ip_addresses.return_value = (['10.10.10.10'], []) + _make_run_dir = funkypatch.patch('cephadmlib.file_utils.make_run_dir') fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7' with with_cephadm_ctx([]) as ctx: ctx.container_engine = mock_podman() diff --git a/src/cephadm/tests/test_logrotate_config.py b/src/cephadm/tests/test_logrotate_config.py new file mode 100644 index 00000000000..c97f21019d8 --- /dev/null +++ b/src/cephadm/tests/test_logrotate_config.py @@ -0,0 +1,57 @@ +from unittest import mock + +import pytest + +from tests.fixtures import import_cephadm, cephadm_fs + +from cephadmlib import logging + + +_cephadm = import_cephadm() + +def test_cluster_logrotate_config(cephadm_fs): + ctx = _cephadm.CephadmContext() + ctx.logrotate_dir = '/my/log/dir' + fsid = '5dcc9af0-7cd3-11ee-9e84-525400babd0a' + + cephadm_fs.create_dir(ctx.logrotate_dir) + + expected_cluster_logrotate_file = """# created by cephadm +/var/log/ceph/5dcc9af0-7cd3-11ee-9e84-525400babd0a/*.log { + rotate 7 + daily + compress + sharedscripts + postrotate + killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror cephfs-mirror tcmu-runner || pkill -1 -x 'ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror|cephfs-mirror|tcmu-runner' || true + endscript + missingok + notifempty + su root root +}""" + + logging.write_cluster_logrotate_config(ctx, fsid) + + with open(ctx.logrotate_dir + f'/ceph-{fsid}', 'r') as f: + assert f.read() == expected_cluster_logrotate_file + +def test_cephadm_logrotate_config(cephadm_fs): + ctx = _cephadm.CephadmContext() + ctx.logrotate_dir = '/my/log/dir' + + cephadm_fs.create_dir(ctx.logrotate_dir) + + expected_cephadm_logrotate_file = """# created by cephadm +/var/log/ceph/cephadm.log { + rotate 7 + daily + compress + missingok + notifempty + su root root +}""" + + logging.write_cephadm_logrotate_config(ctx) + + with open(ctx.logrotate_dir + f'/cephadm', 'r') as f: + assert f.read() == expected_cephadm_logrotate_file diff --git a/src/cephadm/tests/test_nfs.py b/src/cephadm/tests/test_nfs.py index 94ab6afcfdf..aae8113382d 100644 --- a/src/cephadm/tests/test_nfs.py +++ b/src/cephadm/tests/test_nfs.py @@ -155,15 +155,17 @@ def test_nfsganesha_container_envs(): def test_nfsganesha_get_version(): + from cephadmlib.daemons import nfs + with with_cephadm_ctx([]) as ctx: - nfsg = _cephadm.NFSGanesha( + nfsg = nfs.NFSGanesha( ctx, SAMPLE_UUID, "fred", good_nfs_json(), ) - with mock.patch("cephadm.call") as _call: + with mock.patch("cephadmlib.daemons.nfs.call") as _call: _call.return_value = ("NFS-Ganesha Release = V100", "", 0) ver = nfsg.get_version(ctx, "fake_version") _call.assert_called() diff --git a/src/cephadm/tests/test_unit_file.py b/src/cephadm/tests/test_unit_file.py index 0ebd84be3e8..a1d49c93c20 100644 --- a/src/cephadm/tests/test_unit_file.py +++ b/src/cephadm/tests/test_unit_file.py @@ -16,24 +16,26 @@ from tests.fixtures import ( with_cephadm_ctx, ) +from cephadmlib import context +from cephadmlib import systemd_unit from cephadmlib.constants import CGROUPS_SPLIT_PODMAN_VERSION _cephadm = import_cephadm() def _get_unit_file(ctx, fsid): - return str(_cephadm.get_unit_file(ctx, fsid)) + return str(systemd_unit._get_unit_file(ctx, fsid)) def test_docker_engine_requires_docker(): - ctx = _cephadm.CephadmContext() + ctx = context.CephadmContext() ctx.container_engine = mock_docker() r = _get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9') assert 'Requires=docker.service' in r def test_podman_engine_does_not_req_docker(): - ctx = _cephadm.CephadmContext() + ctx = context.CephadmContext() ctx.container_engine = mock_podman() r = _get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9') assert 'Requires=docker.service' not in r @@ -42,7 +44,7 @@ def test_podman_engine_does_not_req_docker(): def test_podman_engine_forking_service(): # verity that the podman service uses the forking service type # and related parameters - ctx = _cephadm.CephadmContext() + ctx = context.CephadmContext() ctx.container_engine = mock_podman() r = _get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9') assert 'Type=forking' in r @@ -52,7 +54,7 @@ def test_podman_engine_forking_service(): def test_podman_with_split_cgroups_sets_delegate(): - ctx = _cephadm.CephadmContext() + ctx = context.CephadmContext() ctx.container_engine = mock_podman() ctx.container_engine.version = CGROUPS_SPLIT_PODMAN_VERSION r = _get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9') @@ -65,7 +67,7 @@ def _ignore_blank_lines(value): def test_new_docker(): - ctx = _cephadm.CephadmContext() + ctx = context.CephadmContext() ctx.container_engine = mock_docker() ru = _get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9') assert _ignore_blank_lines(ru) == [ @@ -104,7 +106,7 @@ def test_new_docker(): def test_new_podman(): - ctx = _cephadm.CephadmContext() + ctx = context.CephadmContext() ctx.container_engine = mock_podman() ctx.container_engine.version = CGROUPS_SPLIT_PODMAN_VERSION ru = _get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9') diff --git a/src/cls/2pc_queue/cls_2pc_queue_ops.h b/src/cls/2pc_queue/cls_2pc_queue_ops.h index 194fdf6da9d..fa4f8765ffe 100644 --- a/src/cls/2pc_queue/cls_2pc_queue_ops.h +++ b/src/cls/2pc_queue/cls_2pc_queue_ops.h @@ -3,12 +3,13 @@ #pragma once +#include "common/ceph_json.h" #include "include/types.h" #include "cls_2pc_queue_types.h" struct cls_2pc_queue_reserve_op { uint64_t size; - uint32_t entries; + uint32_t entries{0}; void encode(ceph::buffer::list& bl) const { ENCODE_START(1, 1, bl); @@ -23,6 +24,19 @@ struct cls_2pc_queue_reserve_op { decode(entries, bl); DECODE_FINISH(bl); } + + void dump(ceph::Formatter *f) const { + f->dump_unsigned("size", size); + f->dump_unsigned("entries", entries); + } + + static void generate_test_instances(std::list<cls_2pc_queue_reserve_op*>& ls) { + ls.push_back(new cls_2pc_queue_reserve_op); + ls.back()->size = 0; + ls.push_back(new cls_2pc_queue_reserve_op); + ls.back()->size = 123; + ls.back()->entries = 456; + } }; WRITE_CLASS_ENCODER(cls_2pc_queue_reserve_op) @@ -40,6 +54,15 @@ struct cls_2pc_queue_reserve_ret { decode(id, bl); DECODE_FINISH(bl); } + + void dump(ceph::Formatter *f) const { + f->dump_unsigned("id", id); + } + + static void generate_test_instances(std::list<cls_2pc_queue_reserve_ret*>& ls) { + ls.push_back(new cls_2pc_queue_reserve_ret); + ls.back()->id = 123; + } }; WRITE_CLASS_ENCODER(cls_2pc_queue_reserve_ret) @@ -61,6 +84,19 @@ struct cls_2pc_queue_commit_op { DECODE_FINISH(bl); } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("id", id); + encode_json("bl_data_vec", bl_data_vec, f); + } + + static void generate_test_instances(std::list<cls_2pc_queue_commit_op*>& ls) { + ls.push_back(new cls_2pc_queue_commit_op); + ls.back()->id = 123; + ls.back()->bl_data_vec.push_back(ceph::buffer::list()); + ls.back()->bl_data_vec.back().append("foo"); + ls.back()->bl_data_vec.push_back(ceph::buffer::list()); + ls.back()->bl_data_vec.back().append("bar"); + } }; WRITE_CLASS_ENCODER(cls_2pc_queue_commit_op) @@ -78,6 +114,13 @@ struct cls_2pc_queue_abort_op { decode(id, bl); DECODE_FINISH(bl); } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("id", id); + } + static void generate_test_instances(std::list<cls_2pc_queue_abort_op*>& ls) { + ls.push_back(new cls_2pc_queue_abort_op); + ls.back()->id = 1; + } }; WRITE_CLASS_ENCODER(cls_2pc_queue_abort_op) @@ -96,6 +139,14 @@ struct cls_2pc_queue_expire_op { decode(stale_time, bl); DECODE_FINISH(bl); } + void dump(ceph::Formatter *f) const { + f->dump_stream("stale_time") << stale_time; + } + static void generate_test_instances(std::list<cls_2pc_queue_expire_op*>& ls) { + ls.push_back(new cls_2pc_queue_expire_op); + ls.push_back(new cls_2pc_queue_expire_op); + ls.back()->stale_time = ceph::coarse_real_time::min(); + } }; WRITE_CLASS_ENCODER(cls_2pc_queue_expire_op) @@ -113,6 +164,23 @@ struct cls_2pc_queue_reservations_ret { decode(reservations, bl); DECODE_FINISH(bl); } + void dump(ceph::Formatter *f) const { + f->open_array_section("reservations"); + for (const auto& i : reservations) { + f->open_object_section("reservation"); + f->dump_unsigned("id", i.first); + i.second.dump(f); + f->close_section(); + } + f->close_section(); + } + + static void generate_test_instances(std::list<cls_2pc_queue_reservations_ret*>& ls) { + ls.push_back(new cls_2pc_queue_reservations_ret); + ls.push_back(new cls_2pc_queue_reservations_ret); + ls.back()->reservations[1] = cls_2pc_reservation(); + ls.back()->reservations[2] = cls_2pc_reservation(); + } }; WRITE_CLASS_ENCODER(cls_2pc_queue_reservations_ret) diff --git a/src/cls/2pc_queue/cls_2pc_queue_types.h b/src/cls/2pc_queue/cls_2pc_queue_types.h index 2413fd7043d..b270c9d6e79 100644 --- a/src/cls/2pc_queue/cls_2pc_queue_types.h +++ b/src/cls/2pc_queue/cls_2pc_queue_types.h @@ -34,6 +34,19 @@ struct cls_2pc_reservation } DECODE_FINISH(bl); } + + void dump(ceph::Formatter *f) const { + f->dump_unsigned("size", size); + f->dump_stream("timestamp") << timestamp; + } + + static void generate_test_instances(std::list<cls_2pc_reservation*>& ls) { + ls.push_back(new cls_2pc_reservation); + ls.back()->size = 0; + ls.push_back(new cls_2pc_reservation); + ls.back()->size = 123; + ls.back()->timestamp = ceph::coarse_real_clock::zero(); + } }; WRITE_CLASS_ENCODER(cls_2pc_reservation) @@ -68,5 +81,28 @@ struct cls_2pc_urgent_data } DECODE_FINISH(bl); } + + void dump(ceph::Formatter *f) const { + f->dump_unsigned("reserved_size", reserved_size); + f->dump_unsigned("last_id", last_id); + f->open_array_section("reservations"); + for (const auto& [id, res] : reservations) { + f->open_object_section("reservation"); + f->dump_unsigned("id", id); + res.dump(f); + f->close_section(); + } + f->close_section(); + f->dump_bool("has_xattrs", has_xattrs); + } + + static void generate_test_instances(std::list<cls_2pc_urgent_data*>& ls) { + ls.push_back(new cls_2pc_urgent_data); + ls.push_back(new cls_2pc_urgent_data); + ls.back()->reserved_size = 123; + ls.back()->last_id = 456; + ls.back()->reservations.emplace(789, cls_2pc_reservation(1, ceph::coarse_real_clock::zero(), 2)); + ls.back()->has_xattrs = true; + } }; WRITE_CLASS_ENCODER(cls_2pc_urgent_data) diff --git a/src/cls/cas/cls_cas_internal.h b/src/cls/cas/cls_cas_internal.h index 09e7f9f1f69..037e60fffd9 100644 --- a/src/cls/cas/cls_cas_internal.h +++ b/src/cls/cas/cls_cas_internal.h @@ -145,6 +145,12 @@ struct chunk_refs_by_object_t : public chunk_refs_t::refs_t { } f->close_section(); } + static void generate_test_instances(std::list<chunk_refs_by_object_t*>& ls) { + ls.push_back(new chunk_refs_by_object_t()); + ls.push_back(new chunk_refs_by_object_t()); + ls.back()->by_object.insert(hobject_t(sobject_t("foo", CEPH_NOSNAP))); + ls.back()->by_object.insert(hobject_t(sobject_t("bar", CEPH_NOSNAP))); + } }; WRITE_CLASS_ENCODER(chunk_refs_by_object_t) @@ -386,6 +392,11 @@ struct chunk_refs_count_t : public chunk_refs_t::refs_t { f->dump_string("type", "count"); f->dump_unsigned("count", total); } + static void generate_test_instances(std::list<chunk_refs_count_t*>& o) { + o.push_back(new chunk_refs_count_t); + o.push_back(new chunk_refs_count_t); + o.back()->total = 123; + } }; WRITE_CLASS_ENCODER(chunk_refs_count_t) diff --git a/src/cls/fifo/cls_fifo_ops.h b/src/cls/fifo/cls_fifo_ops.h index e850c635c0b..d466122a952 100644 --- a/src/cls/fifo/cls_fifo_ops.h +++ b/src/cls/fifo/cls_fifo_ops.h @@ -67,6 +67,31 @@ struct create_meta decode(exclusive, bl); DECODE_FINISH(bl); } + void dump(ceph::Formatter *f) const { + f->dump_string("id", id); + f->dump_object("version", version.value_or(objv())); + f->dump_string("pool_name", pool.name); + f->dump_string("pool_ns", pool.ns); + f->dump_string("oid_prefix", oid_prefix.value_or("")); + f->dump_unsigned("max_part_size", max_part_size); + f->dump_unsigned("max_entry_size", max_entry_size); + f->dump_bool("exclusive", exclusive); + } + static void generate_test_instances(std::list<create_meta*>& o) { + o.push_back(new create_meta); + o.push_back(new create_meta); + o.back()->id = "id"; + objv v1; + v1.instance = "inst1"; + v1.ver = 1; + o.back()->version = v1; + o.back()->pool.name = "pool"; + o.back()->pool.ns = "ns"; + o.back()->oid_prefix = "prefix"; + o.back()->max_part_size = 1024; + o.back()->max_entry_size = 1024; + o.back()->exclusive = true; + } }; WRITE_CLASS_ENCODER(create_meta) @@ -84,6 +109,17 @@ struct get_meta decode(version, bl); DECODE_FINISH(bl); } + void dump(ceph::Formatter *f) const { + f->dump_object("version", version.value_or(objv())); + } + static void generate_test_instances(std::list<get_meta*>& o) { + o.push_back(new get_meta); + o.push_back(new get_meta); + objv v1; + v1.instance = "inst1"; + v1.ver = 1; + o.back()->version = v1; + } }; WRITE_CLASS_ENCODER(get_meta) @@ -108,6 +144,18 @@ struct get_meta_reply decode(part_entry_overhead, bl); DECODE_FINISH(bl); } + void dump(ceph::Formatter *f) const { + f->dump_object("info", info); + f->dump_unsigned("part_header_size", part_header_size); + f->dump_unsigned("part_entry_overhead", part_entry_overhead); + } + static void generate_test_instances(std::list<get_meta_reply*>& o) { + o.push_back(new get_meta_reply); + o.push_back(new get_meta_reply); + o.back()->info = fifo::info(); + o.back()->part_header_size = 1024; + o.back()->part_entry_overhead = 1024; + } }; WRITE_CLASS_ENCODER(get_meta_reply) diff --git a/src/cls/fifo/cls_fifo_types.h b/src/cls/fifo/cls_fifo_types.h index 1c69c1f0871..2ae601e4aaf 100644 --- a/src/cls/fifo/cls_fifo_types.h +++ b/src/cls/fifo/cls_fifo_types.h @@ -54,7 +54,16 @@ struct objv { decode(ver, bl); DECODE_FINISH(bl); } - void dump(ceph::Formatter* f) const; + void dump(ceph::Formatter* f) const { + f->dump_string("instance", instance); + f->dump_unsigned("ver", ver); + } + static void generate_test_instances(std::list<objv*>& o) { + o.push_back(new objv); + o.push_back(new objv); + o.back()->instance = "instance"; + o.back()->ver = 1; + } void decode_json(JSONObj* obj); bool operator ==(const objv& rhs) const { @@ -103,7 +112,18 @@ struct data_params { decode(full_size_threshold, bl); DECODE_FINISH(bl); } - void dump(ceph::Formatter* f) const; + void dump(ceph::Formatter* f) const { + f->dump_unsigned("max_part_size", max_part_size); + f->dump_unsigned("max_entry_size", max_entry_size); + f->dump_unsigned("full_size_threshold", full_size_threshold); + } + static void generate_test_instances(std::list<data_params*>& o) { + o.push_back(new data_params); + o.push_back(new data_params); + o.back()->max_part_size = 1; + o.back()->max_entry_size = 2; + o.back()->full_size_threshold = 3; + } void decode_json(JSONObj* obj); auto operator <=>(const data_params&) const = default; @@ -161,7 +181,10 @@ struct journal_entry { decode(part_tag, bl); DECODE_FINISH(bl); } - void dump(ceph::Formatter* f) const; + void dump(ceph::Formatter* f) const { + f->dump_int("op", (int)op); + f->dump_int("part_num", part_num); + } auto operator <=>(const journal_entry&) const = default; }; @@ -397,7 +420,38 @@ struct info { decode_journal(bl); DECODE_FINISH(bl); } - void dump(ceph::Formatter* f) const; + void dump(ceph::Formatter* f) const { + f->dump_string("id", id); + f->dump_object("version", version); + f->dump_string("oid_prefix", oid_prefix); + f->dump_object("params", params); + f->dump_int("tail_part_num", tail_part_num); + f->dump_int("head_part_num", head_part_num); + f->dump_int("min_push_part_num", min_push_part_num); + f->dump_int("max_push_part_num", max_push_part_num); + f->open_array_section("journal"); + for (const auto& entry : journal) { + f->open_object_section("entry"); + f->dump_object("entry", entry); + f->close_section(); + } + f->close_section(); + } + static void generate_test_instances(std::list<info*>& o) { + o.push_back(new info); + o.push_back(new info); + o.back()->id = "myid"; + o.back()->version = objv(); + o.back()->oid_prefix = "myprefix"; + o.back()->params = data_params(); + o.back()->tail_part_num = 123; + o.back()->head_part_num = 456; + o.back()->min_push_part_num = 789; + o.back()->max_push_part_num = 101112; + o.back()->journal.insert(journal_entry(journal_entry::Op::create, 1)); + o.back()->journal.insert(journal_entry(journal_entry::Op::create, 2)); + o.back()->journal.insert(journal_entry(journal_entry::Op::create, 3)); + } void decode_json(JSONObj* obj); std::string part_oid(std::int64_t part_num) const { diff --git a/src/cls/log/cls_log_ops.h b/src/cls/log/cls_log_ops.h index 5a65892598b..4d3b2f5d309 100644 --- a/src/cls/log/cls_log_ops.h +++ b/src/cls/log/cls_log_ops.h @@ -4,6 +4,7 @@ #ifndef CEPH_CLS_LOG_OPS_H #define CEPH_CLS_LOG_OPS_H +#include "common/ceph_json.h" #include "cls_log_types.h" struct cls_log_add_op { @@ -73,6 +74,21 @@ struct cls_log_list_op { decode(max_entries, bl); DECODE_FINISH(bl); } + + void dump(ceph::Formatter* f) const { + f->dump_stream("from_time") << from_time; + f->dump_string("marker", marker); + f->dump_stream("to_time") << to_time; + f->dump_int("max_entries", max_entries); + } + static void generate_test_instances(std::list<cls_log_list_op*>& ls) { + ls.push_back(new cls_log_list_op); + ls.push_back(new cls_log_list_op); + ls.back()->from_time = utime_t(1, 2); + ls.back()->marker = "marker"; + ls.back()->to_time = utime_t(3, 4); + ls.back()->max_entries = 5; + } }; WRITE_CLASS_ENCODER(cls_log_list_op) @@ -98,6 +114,25 @@ struct cls_log_list_ret { decode(truncated, bl); DECODE_FINISH(bl); } + + void dump(ceph::Formatter* f) const { + encode_json("entries", entries, f); + f->dump_string("marker", marker); + f->dump_bool("truncated", truncated); + } + static void generate_test_instances(std::list<cls_log_list_ret*>& ls) { + ls.push_back(new cls_log_list_ret); + ls.push_back(new cls_log_list_ret); + ls.back()->entries.push_back(cls_log_entry()); + ls.back()->entries.push_back(cls_log_entry()); + ls.back()->entries.back().section = "section"; + ls.back()->entries.back().name = "name"; + ls.back()->entries.back().timestamp = utime_t(1, 2); + ls.back()->entries.back().data.append("data"); + ls.back()->entries.back().id = "id"; + ls.back()->marker = "marker"; + ls.back()->truncated = true; + } }; WRITE_CLASS_ENCODER(cls_log_list_ret) @@ -133,6 +168,20 @@ struct cls_log_trim_op { } DECODE_FINISH(bl); } + void dump(ceph::Formatter* f) const { + f->dump_stream("from_time") << from_time; + f->dump_stream("to_time") << to_time; + f->dump_string("from_marker", from_marker); + f->dump_string("to_marker", to_marker); + } + static void generate_test_instances(std::list<cls_log_trim_op*>& ls) { + ls.push_back(new cls_log_trim_op); + ls.push_back(new cls_log_trim_op); + ls.back()->from_time = utime_t(1, 2); + ls.back()->to_time = utime_t(3, 4); + ls.back()->from_marker = "from_marker"; + ls.back()->to_marker = "to_marker"; + } }; WRITE_CLASS_ENCODER(cls_log_trim_op) @@ -150,6 +199,13 @@ struct cls_log_info_op { // currently empty request DECODE_FINISH(bl); } + + void dump(ceph::Formatter* f) const { + } + + static void generate_test_instances(std::list<cls_log_info_op*>& ls) { + ls.push_back(new cls_log_info_op); + } }; WRITE_CLASS_ENCODER(cls_log_info_op) diff --git a/src/cls/log/cls_log_types.h b/src/cls/log/cls_log_types.h index 33b8cce51e5..29aa2bae8fe 100644 --- a/src/cls/log/cls_log_types.h +++ b/src/cls/log/cls_log_types.h @@ -92,6 +92,16 @@ struct cls_log_header { decode(max_time, bl); DECODE_FINISH(bl); } + void dump(ceph::Formatter* f) const { + f->dump_string("max_marker", max_marker); + f->dump_stream("max_time") << max_time; + } + static void generate_test_instances(std::list<cls_log_header*>& o) { + o.push_back(new cls_log_header); + o.push_back(new cls_log_header); + o.back()->max_marker = "test_marker"; + o.back()->max_time = utime_t(); + } }; inline bool operator ==(const cls_log_header& lhs, const cls_log_header& rhs) { return (lhs.max_marker == rhs.max_marker && diff --git a/src/cls/queue/cls_queue_ops.h b/src/cls/queue/cls_queue_ops.h index f41572036c5..25bf1200c54 100644 --- a/src/cls/queue/cls_queue_ops.h +++ b/src/cls/queue/cls_queue_ops.h @@ -4,6 +4,7 @@ #ifndef CEPH_CLS_QUEUE_OPS_H #define CEPH_CLS_QUEUE_OPS_H +#include "common/ceph_json.h" #include "cls/queue/cls_queue_types.h" struct cls_queue_init_op { @@ -29,6 +30,19 @@ struct cls_queue_init_op { DECODE_FINISH(bl); } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("queue_size", queue_size); + f->dump_unsigned("max_urgent_data_size", max_urgent_data_size); + f->dump_unsigned("urgent_data_len", bl_urgent_data.length()); + } + + static void generate_test_instances(std::list<cls_queue_init_op*>& o) { + o.push_back(new cls_queue_init_op); + o.push_back(new cls_queue_init_op); + o.back()->queue_size = 1024; + o.back()->max_urgent_data_size = 1024; + o.back()->bl_urgent_data.append(std::string_view("data")); + } }; WRITE_CLASS_ENCODER(cls_queue_init_op) @@ -47,12 +61,23 @@ struct cls_queue_enqueue_op { DECODE_START(1, bl); decode(bl_data_vec, bl); DECODE_FINISH(bl); - } + } + + void dump(ceph::Formatter *f) const { + f->dump_unsigned("data_vec_len", bl_data_vec.size()); + } + + static void generate_test_instances(std::list<cls_queue_enqueue_op*>& o) { + o.push_back(new cls_queue_enqueue_op); + o.push_back(new cls_queue_enqueue_op); + o.back()->bl_data_vec.push_back(ceph::buffer::list()); + o.back()->bl_data_vec.back().append(std::string_view("data")); + } }; WRITE_CLASS_ENCODER(cls_queue_enqueue_op) struct cls_queue_list_op { - uint64_t max; + uint64_t max{0}; std::string start_marker; std::string end_marker; @@ -75,6 +100,18 @@ struct cls_queue_list_op { } DECODE_FINISH(bl); } + + void dump(ceph::Formatter *f) const { + f->dump_unsigned("max", max); + f->dump_string("start_marker", start_marker); + } + + static void generate_test_instances(std::list<cls_queue_list_op*>& o) { + o.push_back(new cls_queue_list_op); + o.push_back(new cls_queue_list_op); + o.back()->max = 123; + o.back()->start_marker = "foo"; + } }; WRITE_CLASS_ENCODER(cls_queue_list_op) @@ -100,6 +137,22 @@ struct cls_queue_list_ret { decode(entries, bl); DECODE_FINISH(bl); } + + void dump(ceph::Formatter *f) const { + f->dump_bool("is_truncated", is_truncated); + f->dump_string("next_marker", next_marker); + encode_json("entries", entries, f); + } + + static void generate_test_instances(std::list<cls_queue_list_ret*>& o) { + o.push_back(new cls_queue_list_ret); + o.back()->is_truncated = true; + o.back()->next_marker = "foo"; + o.back()->entries.push_back(cls_queue_entry()); + o.back()->entries.push_back(cls_queue_entry()); + o.back()->entries.back().marker = "id"; + o.back()->entries.back().data.append(std::string_view("data")); + } }; WRITE_CLASS_ENCODER(cls_queue_list_ret) @@ -119,6 +172,15 @@ struct cls_queue_remove_op { decode(end_marker, bl); DECODE_FINISH(bl); } + + void dump(ceph::Formatter *f) const { + f->dump_string("end_marker", end_marker); + } + static void generate_test_instances(std::list<cls_queue_remove_op*>& o) { + o.push_back(new cls_queue_remove_op); + o.push_back(new cls_queue_remove_op); + o.back()->end_marker = "foo"; + } }; WRITE_CLASS_ENCODER(cls_queue_remove_op) @@ -138,6 +200,14 @@ struct cls_queue_get_capacity_ret { decode(queue_capacity, bl); DECODE_FINISH(bl); } + + void dump(ceph::Formatter *f) const { + f->dump_unsigned("queue_capacity", queue_capacity); + } + static void generate_test_instances(std::list<cls_queue_get_capacity_ret*>& o) { + o.push_back(new cls_queue_get_capacity_ret); + o.back()->queue_capacity = 123; + } }; WRITE_CLASS_ENCODER(cls_queue_get_capacity_ret) diff --git a/src/cls/queue/cls_queue_types.h b/src/cls/queue/cls_queue_types.h index cc46df40505..3c3e828edf0 100644 --- a/src/cls/queue/cls_queue_types.h +++ b/src/cls/queue/cls_queue_types.h @@ -34,6 +34,17 @@ struct cls_queue_entry decode(marker, bl); DECODE_FINISH(bl); } + + void dump(ceph::Formatter *f) const { + f->dump_string("marker", marker); + f->dump_unsigned("data_len", data.length()); + } + static void generate_test_instances(std::list<cls_queue_entry*>& o) { + o.push_back(new cls_queue_entry); + o.push_back(new cls_queue_entry); + o.back()->data.append(std::string_view("data")); + o.back()->marker = "marker"; + } }; WRITE_CLASS_ENCODER(cls_queue_entry) @@ -80,7 +91,16 @@ struct cls_queue_marker } return 0; } - + void dump(ceph::Formatter *f) const { + f->dump_unsigned("offset", offset); + f->dump_unsigned("gen", gen); + } + static void generate_test_instances(std::list<cls_queue_marker*>& o) { + o.push_back(new cls_queue_marker); + o.push_back(new cls_queue_marker); + o.back()->offset = 1024; + o.back()->gen = 0; + } }; WRITE_CLASS_ENCODER(cls_queue_marker) @@ -114,6 +134,27 @@ struct cls_queue_head decode(bl_urgent_data, bl); DECODE_FINISH(bl); } + + void dump(ceph::Formatter *f) const { + f->dump_unsigned("max_head_size", max_head_size); + f->dump_unsigned("queue_size", queue_size); + f->dump_unsigned("max_urgent_data_size", max_urgent_data_size); + f->dump_unsigned("front_offset", front.offset); + f->dump_unsigned("front_gen", front.gen); + f->dump_unsigned("tail_offset", tail.offset); + f->dump_unsigned("tail_gen", tail.gen); + } + static void generate_test_instances(std::list<cls_queue_head*>& o) { + o.push_back(new cls_queue_head); + o.push_back(new cls_queue_head); + o.back()->max_head_size = 1024; + o.back()->front.offset = 1024; + o.back()->front.gen = 0; + o.back()->tail.offset = 1024; + o.back()->tail.gen = 0; + o.back()->queue_size = 1024; + o.back()->max_urgent_data_size = 0; + } }; WRITE_CLASS_ENCODER(cls_queue_head) diff --git a/src/cls/rgw/cls_rgw_client.cc b/src/cls/rgw/cls_rgw_client.cc index ed2be322da5..966823729b7 100644 --- a/src/cls/rgw/cls_rgw_client.cc +++ b/src/cls/rgw/cls_rgw_client.cc @@ -751,12 +751,11 @@ int CLSRGWIssueBucketBILogStop::issue_op(const int shard_id, const string& oid) } class GetDirHeaderCompletion : public ObjectOperationCompletion { - RGWGetDirHeader_CB *ret_ctx; + boost::intrusive_ptr<RGWGetDirHeader_CB> cb; public: - explicit GetDirHeaderCompletion(RGWGetDirHeader_CB *_ctx) : ret_ctx(_ctx) {} - ~GetDirHeaderCompletion() override { - ret_ctx->put(); - } + explicit GetDirHeaderCompletion(boost::intrusive_ptr<RGWGetDirHeader_CB> cb) + : cb(std::move(cb)) {} + void handle_completion(int r, bufferlist& outbl) override { rgw_cls_list_ret ret; try { @@ -765,20 +764,20 @@ public: } catch (ceph::buffer::error& err) { r = -EIO; } - - ret_ctx->handle_response(r, ret.dir.header); + cb->handle_response(r, ret.dir.header); } }; -int cls_rgw_get_dir_header_async(IoCtx& io_ctx, string& oid, RGWGetDirHeader_CB *ctx) +int cls_rgw_get_dir_header_async(IoCtx& io_ctx, const string& oid, + boost::intrusive_ptr<RGWGetDirHeader_CB> cb) { bufferlist in, out; rgw_cls_list_op call; call.num_entries = 0; encode(call, in); ObjectReadOperation op; - GetDirHeaderCompletion *cb = new GetDirHeaderCompletion(ctx); - op.exec(RGW_CLASS, RGW_BUCKET_LIST, in, cb); + op.exec(RGW_CLASS, RGW_BUCKET_LIST, in, + new GetDirHeaderCompletion(std::move(cb))); AioCompletion *c = librados::Rados::aio_create_completion(nullptr, nullptr); int r = io_ctx.aio_operate(oid, c, &op, NULL); c->release(); diff --git a/src/cls/rgw/cls_rgw_client.h b/src/cls/rgw/cls_rgw_client.h index 3db73065171..6f9084acdd3 100644 --- a/src/cls/rgw/cls_rgw_client.h +++ b/src/cls/rgw/cls_rgw_client.h @@ -3,6 +3,8 @@ #pragma once +#include <boost/intrusive_ptr.hpp> +#include <boost/smart_ptr/intrusive_ref_counter.hpp> #include "include/str_list.h" #include "include/rados/librados.hpp" #include "cls_rgw_ops.h" @@ -151,10 +153,10 @@ public: } }; -class RGWGetDirHeader_CB : public RefCountedObject { +class RGWGetDirHeader_CB : public boost::intrusive_ref_counter<RGWGetDirHeader_CB> { public: - ~RGWGetDirHeader_CB() override {} - virtual void handle_response(int r, rgw_bucket_dir_header& header) = 0; + virtual ~RGWGetDirHeader_CB() {} + virtual void handle_response(int r, const rgw_bucket_dir_header& header) = 0; }; class BucketIndexShardsManager { @@ -572,7 +574,8 @@ public: virtual ~CLSRGWIssueBucketBILogStop() override {} }; -int cls_rgw_get_dir_header_async(librados::IoCtx& io_ctx, std::string& oid, RGWGetDirHeader_CB *ctx); +int cls_rgw_get_dir_header_async(librados::IoCtx& io_ctx, const std::string& oid, + boost::intrusive_ptr<RGWGetDirHeader_CB> cb); void cls_rgw_encode_suggestion(char op, rgw_bucket_dir_entry& dirent, ceph::buffer::list& updates); diff --git a/src/cls/timeindex/cls_timeindex_ops.h b/src/cls/timeindex/cls_timeindex_ops.h index f40058954dc..f0f0cc02475 100644 --- a/src/cls/timeindex/cls_timeindex_ops.h +++ b/src/cls/timeindex/cls_timeindex_ops.h @@ -4,6 +4,7 @@ #ifndef CEPH_CLS_TIMEINDEX_OPS_H #define CEPH_CLS_TIMEINDEX_OPS_H +#include "common/ceph_json.h" #include "cls_timeindex_types.h" struct cls_timeindex_add_op { @@ -51,6 +52,26 @@ struct cls_timeindex_list_op { decode(max_entries, bl); DECODE_FINISH(bl); } + + void dump(ceph::Formatter *f) const { + f->open_object_section("from_time"); + from_time.dump(f); + f->close_section(); + f->dump_string("marker", marker); + f->open_object_section("to_time"); + to_time.dump(f); + f->close_section(); + f->dump_int("max_entries", max_entries); + } + + static void generate_test_instances(std::list<cls_timeindex_list_op*>& o) { + o.push_back(new cls_timeindex_list_op); + o.push_back(new cls_timeindex_list_op); + o.back()->from_time = utime_t(1, 2); + o.back()->marker = "marker"; + o.back()->to_time = utime_t(3, 4); + o.back()->max_entries = 5; + } }; WRITE_CLASS_ENCODER(cls_timeindex_list_op) @@ -76,6 +97,23 @@ struct cls_timeindex_list_ret { decode(truncated, bl); DECODE_FINISH(bl); } + + void dump(ceph::Formatter *f) const { + encode_json("entries", entries, f); + f->dump_string("marker", marker); + f->dump_bool("truncated", truncated); + } + + static void generate_test_instances(std::list<cls_timeindex_list_ret*>& o) { + o.push_back(new cls_timeindex_list_ret); + o.push_back(new cls_timeindex_list_ret); + o.back()->entries.push_back(cls_timeindex_entry()); + o.back()->entries.back().key_ts = utime_t(1, 2); + o.back()->entries.back().key_ext = "key_ext"; + o.back()->entries.back().value.append("value"); + o.back()->marker = "marker"; + o.back()->truncated = true; + } }; WRITE_CLASS_ENCODER(cls_timeindex_list_ret) diff --git a/src/cls/timeindex/cls_timeindex_types.h b/src/cls/timeindex/cls_timeindex_types.h index d33886881be..ea8d6c93d2c 100644 --- a/src/cls/timeindex/cls_timeindex_types.h +++ b/src/cls/timeindex/cls_timeindex_types.h @@ -4,9 +4,9 @@ #ifndef CEPH_CLS_TIMEINDEX_TYPES_H #define CEPH_CLS_TIMEINDEX_TYPES_H +#include "common/Formatter.h" #include "include/encoding.h" #include "include/types.h" - #include "include/utime.h" class JSONObj; diff --git a/src/cls/version/cls_version_ops.h b/src/cls/version/cls_version_ops.h index 62cd1172982..2eff788ce52 100644 --- a/src/cls/version/cls_version_ops.h +++ b/src/cls/version/cls_version_ops.h @@ -5,6 +5,7 @@ #define CEPH_CLS_VERSION_OPS_H #include "cls_version_types.h" +#include "common/ceph_json.h" struct cls_version_set_op { obj_version objv; @@ -22,6 +23,17 @@ struct cls_version_set_op { decode(objv, bl); DECODE_FINISH(bl); } + + void dump(ceph::Formatter *f) const { + f->dump_object("objv", objv); + } + + static void generate_test_instances(std::list<cls_version_set_op*>& o) { + o.push_back(new cls_version_set_op); + o.push_back(new cls_version_set_op); + o.back()->objv.ver = 123; + o.back()->objv.tag = "foo"; + } }; WRITE_CLASS_ENCODER(cls_version_set_op) @@ -44,6 +56,22 @@ struct cls_version_inc_op { decode(conds, bl); DECODE_FINISH(bl); } + + void dump(ceph::Formatter *f) const { + f->dump_object("objv", objv); + encode_json("conds", conds, f); + } + + static void generate_test_instances(std::list<cls_version_inc_op*>& o) { + o.push_back(new cls_version_inc_op); + o.push_back(new cls_version_inc_op); + o.back()->objv.ver = 123; + o.back()->objv.tag = "foo"; + o.back()->conds.push_back(obj_version_cond()); + o.back()->conds.back().ver.ver = 123; + o.back()->conds.back().ver.tag = "foo"; + o.back()->conds.back().cond = VER_COND_GE; + } }; WRITE_CLASS_ENCODER(cls_version_inc_op) @@ -66,6 +94,22 @@ struct cls_version_check_op { decode(conds, bl); DECODE_FINISH(bl); } + + void dump(ceph::Formatter *f) const { + f->dump_object("objv", objv); + encode_json("conds", conds, f); + } + + static void generate_test_instances(std::list<cls_version_check_op*>& o) { + o.push_back(new cls_version_check_op); + o.push_back(new cls_version_check_op); + o.back()->objv.ver = 123; + o.back()->objv.tag = "foo"; + o.back()->conds.push_back(obj_version_cond()); + o.back()->conds.back().ver.ver = 123; + o.back()->conds.back().ver.tag = "foo"; + o.back()->conds.back().cond = VER_COND_GE; + } }; WRITE_CLASS_ENCODER(cls_version_check_op) @@ -85,6 +129,17 @@ struct cls_version_read_ret { decode(objv, bl); DECODE_FINISH(bl); } + + void dump(ceph::Formatter *f) const { + f->dump_object("objv", objv); + } + + static void generate_test_instances(std::list<cls_version_read_ret*>& o) { + o.push_back(new cls_version_read_ret); + o.push_back(new cls_version_read_ret); + o.back()->objv.ver = 123; + o.back()->objv.tag = "foo"; + } }; WRITE_CLASS_ENCODER(cls_version_read_ret) diff --git a/src/cls/version/cls_version_types.cc b/src/cls/version/cls_version_types.cc index b82f6aa8a5d..735ef7c8985 100644 --- a/src/cls/version/cls_version_types.cc +++ b/src/cls/version/cls_version_types.cc @@ -6,12 +6,6 @@ #include "common/ceph_json.h" -void obj_version::dump(ceph::Formatter *f) const -{ - f->dump_int("ver", ver); - f->dump_string("tag", tag); -} - void obj_version::decode_json(JSONObj *obj) { JSONDecoder::decode_json("ver", ver, obj); diff --git a/src/cls/version/cls_version_types.h b/src/cls/version/cls_version_types.h index 62cc16e33d5..dafa866e1f3 100644 --- a/src/cls/version/cls_version_types.h +++ b/src/cls/version/cls_version_types.h @@ -53,7 +53,11 @@ struct obj_version { tag.compare(v.tag) == 0); } - void dump(ceph::Formatter *f) const; + void dump(ceph::Formatter *f) const { + f->dump_int("ver", ver); + f->dump_string("tag", tag); + } + void decode_json(JSONObj *obj); static void generate_test_instances(std::list<obj_version*>& o); }; @@ -91,6 +95,18 @@ struct obj_version_cond { DECODE_FINISH(bl); } + void dump(ceph::Formatter *f) const { + f->dump_object("ver", ver); + f->dump_unsigned("cond", cond); + } + + static void generate_test_instances(std::list<obj_version_cond*>& o) { + o.push_back(new obj_version_cond); + o.push_back(new obj_version_cond); + o.back()->ver.ver = 1; + o.back()->ver.tag = "foo"; + o.back()->cond = VER_COND_EQ; + } }; WRITE_CLASS_ENCODER(obj_version_cond) diff --git a/src/common/RefCountedObj.h b/src/common/RefCountedObj.h index ef966463cda..a2667757399 100644 --- a/src/common/RefCountedObj.h +++ b/src/common/RefCountedObj.h @@ -180,6 +180,12 @@ struct RefCountedWaitObject { } }; +static inline void intrusive_ptr_add_ref(RefCountedWaitObject *p) { + p->get(); +} +static inline void intrusive_ptr_release(RefCountedWaitObject *p) { + p->put(); +} #endif // !defined(WITH_SEASTAR)|| defined(WITH_ALIEN) static inline void intrusive_ptr_add_ref(const RefCountedObject *p) { @@ -196,7 +202,7 @@ struct UniquePtrDeleter p->put(); } }; -} +} // namespace TOPNSPC::common using RefCountedPtr = ceph::ref_t<TOPNSPC::common::RefCountedObject>; #endif diff --git a/src/common/ceph_json.h b/src/common/ceph_json.h index 08e8d9e4662..f7a899fd935 100644 --- a/src/common/ceph_json.h +++ b/src/common/ceph_json.h @@ -836,6 +836,61 @@ public: DECODE_FINISH(bl); } + void dump(ceph::Formatter *f) const { + switch (type) { + case FMT_VALUE: + if (value.quoted) { + f->dump_string("value", value.str); + } else { + f->dump_format_unquoted("value", "%s", value.str.c_str()); + } + break; + case FMT_ARRAY: + f->open_array_section("array"); + for (auto& i : arr) { + i.dump(f); + } + f->close_section(); + break; + case FMT_OBJ: + f->open_object_section("object"); + for (auto& i : obj) { + f->dump_object(i.first.c_str(), i.second); + } + f->close_section(); + break; + default: + break; + } + } + static void generate_test_instances(std::list<JSONFormattable*>& o) { + o.push_back(new JSONFormattable); + o.push_back(new JSONFormattable); + o.back()->set_type(FMT_VALUE); + o.back()->value.str = "foo"; + o.back()->value.quoted = true; + o.push_back(new JSONFormattable); + o.back()->set_type(FMT_VALUE); + o.back()->value.str = "foo"; + o.back()->value.quoted = false; + o.push_back(new JSONFormattable); + o.back()->set_type(FMT_ARRAY); + o.back()->arr.push_back(JSONFormattable()); + o.back()->arr.back().set_type(FMT_VALUE); + o.back()->arr.back().value.str = "foo"; + o.back()->arr.back().value.quoted = true; + o.back()->arr.push_back(JSONFormattable()); + o.back()->arr.back().set_type(FMT_VALUE); + o.back()->arr.back().value.str = "bar"; + o.back()->arr.back().value.quoted = true; + o.push_back(new JSONFormattable); + o.back()->set_type(FMT_OBJ); + o.back()->obj["foo"] = JSONFormattable(); + o.back()->obj["foo"].set_type(FMT_VALUE); + o.back()->obj["foo"].value.str = "bar"; + o.back()->obj["foo"].value.quoted = true; + } + const std::string& val() const { return value.str; } diff --git a/src/common/entity_name.cc b/src/common/entity_name.cc index 5357b34eacb..a9d6fb9c8b1 100644 --- a/src/common/entity_name.cc +++ b/src/common/entity_name.cc @@ -29,21 +29,30 @@ const std::array<EntityName::str_to_entity_type_t, 6> EntityName::STR_TO_ENTITY_ { CEPH_ENTITY_TYPE_CLIENT, "client" }, }}; -const std::string& EntityName:: -to_str() const -{ +void EntityName::dump(ceph::Formatter *f) const { + f->dump_int("type", type); + f->dump_string("id", id); +} + +void EntityName::generate_test_instances(std::list<EntityName*>& ls) { + ls.push_back(new EntityName); + ls.push_back(new EntityName); + ls.back()->set_type(CEPH_ENTITY_TYPE_OSD); + ls.back()->set_id("0"); + ls.push_back(new EntityName); + ls.back()->set_type(CEPH_ENTITY_TYPE_MDS); + ls.back()->set_id("a"); +} + +const std::string& EntityName::to_str() const { return type_id; } -const char* EntityName:: -to_cstr() const -{ +const char* EntityName::to_cstr() const { return type_id.c_str(); } -bool EntityName:: -from_str(std::string_view s) -{ +bool EntityName::from_str(std::string_view s) { size_t pos = s.find('.'); if (pos == string::npos) @@ -56,9 +65,7 @@ from_str(std::string_view s) return true; } -void EntityName:: -set(uint32_t type_, std::string_view id_) -{ +void EntityName::set(uint32_t type_, std::string_view id_) { type = type_; id = id_; @@ -71,9 +78,7 @@ set(uint32_t type_, std::string_view id_) } } -int EntityName:: -set(std::string_view type_, std::string_view id_) -{ +int EntityName::set(std::string_view type_, std::string_view id_) { uint32_t t = str_to_ceph_entity_type(type_); if (t == CEPH_ENTITY_TYPE_ANY) return -EINVAL; @@ -81,9 +86,7 @@ set(std::string_view type_, std::string_view id_) return 0; } -void EntityName:: -set_type(uint32_t type_) -{ +void EntityName::set_type(uint32_t type_) { set(type_, id); } @@ -93,9 +96,7 @@ set_type(std::string_view type_) return set(type_, id); } -void EntityName:: -set_id(std::string_view id_) -{ +void EntityName::set_id(std::string_view id_) { set(type, id_); } @@ -106,33 +107,23 @@ void EntityName::set_name(entity_name_t n) set(n.type(), s); } -const char* EntityName:: -get_type_str() const -{ +const char* EntityName::get_type_str() const { return ceph_entity_type_name(type); } -std::string_view EntityName:: -get_type_name() const -{ +std::string_view EntityName::get_type_name() const { return ceph_entity_type_name(type); } -const std::string &EntityName:: -get_id() const -{ +const std::string &EntityName::get_id() const { return id; } -bool EntityName:: -has_default_id() const -{ +bool EntityName::has_default_id() const { return (id == "admin"); } -std::string EntityName:: -get_valid_types_as_str() -{ +std::string EntityName::get_valid_types_as_str() { std::ostringstream out; size_t i; for (i = 0; i < STR_TO_ENTITY_TYPE.size(); ++i) { diff --git a/src/common/entity_name.h b/src/common/entity_name.h index c88ebcbbabd..53f8cd4d5d0 100644 --- a/src/common/entity_name.h +++ b/src/common/entity_name.h @@ -41,7 +41,8 @@ struct EntityName decode(id_, bl); set(type_, id_); } - + void dump(ceph::Formatter *f) const; + static void generate_test_instances(std::list<EntityName*>& ls); const std::string& to_str() const; const char *to_cstr() const; bool from_str(std::string_view s); diff --git a/src/common/mempool.cc b/src/common/mempool.cc index 79354f70821..4ecfaf81fc6 100644 --- a/src/common/mempool.cc +++ b/src/common/mempool.cc @@ -15,9 +15,12 @@ #include "include/mempool.h" #include "include/demangle.h" +#if defined(_GNU_SOURCE) && defined(WITH_SEASTAR) && !defined(WITH_ALIEN) +#else // Thread local variables should save index, not &shard[index], // because shard[] is defined in the class static thread_local size_t thread_shard_index = mempool::num_shards; +#endif // default to debug_mode off bool mempool::debug_mode = false; @@ -95,9 +98,21 @@ size_t mempool::pool_t::allocated_items() const void mempool::pool_t::adjust_count(ssize_t items, ssize_t bytes) { - thread_shard_index = (thread_shard_index == num_shards) ? pick_a_shard_int() : thread_shard_index; - shard[thread_shard_index].items += items; - shard[thread_shard_index].bytes += bytes; +#if defined(_GNU_SOURCE) && defined(WITH_SEASTAR) && !defined(WITH_ALIEN) + // the expected path: we alway pick the shard for a cpu core + // a thread is executing on. + const size_t shard_index = pick_a_shard_int(); +#else + // fallback for lack of sched_getcpu() + const size_t shard_index = []() { + if (thread_shard_index == num_shards) { + thread_shard_index = pick_a_shard_int(); + } + return thread_shard_index; + }(); +#endif + shard[shard_index].items += items; + shard[shard_index].bytes += bytes; } void mempool::pool_t::get_stats( @@ -113,8 +128,17 @@ void mempool::pool_t::get_stats( for (auto &p : type_map) { std::string n = ceph_demangle(p.second.type_name); stats_t &s = (*by_type)[n]; +#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN) + s.bytes = 0; + s.items = 0; + for (size_t i = 0 ; i < num_shards; ++i) { + s.bytes += p.second.shards[i].items * p.second.item_size; + s.items += p.second.shards[i].items; + } +#else s.bytes = p.second.items * p.second.item_size; s.items = p.second.items; +#endif } } } diff --git a/src/common/options/rgw.yaml.in b/src/common/options/rgw.yaml.in index b03e4449ab1..3f0f691e84e 100644 --- a/src/common/options/rgw.yaml.in +++ b/src/common/options/rgw.yaml.in @@ -359,7 +359,11 @@ options: type: str level: advanced desc: Lifecycle allowed work time - long_desc: Local time window in which the lifecycle maintenance thread can work. + long_desc: Local time window in which the lifecycle maintenance thread can work. It expects + 24-hour time notation. For example, "00:00-23:59" means starting at midnight lifecycle + is allowed to run for the whole day (24 hours). When lifecycle completes, it waits for the + next maintenance window. In this example, if it completes at 01:00, it will resume processing + 23 hours later at the following midnight. default: 00:00-06:00 services: - rgw diff --git a/src/common/versioned_variant.h b/src/common/versioned_variant.h new file mode 100644 index 00000000000..124c5883916 --- /dev/null +++ b/src/common/versioned_variant.h @@ -0,0 +1,234 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright contributors to the Ceph project + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include <concepts> +#include <limits> +#include <list> +#include <variant> + +#include <boost/mp11/algorithm.hpp> // for mp_with_index +#include "include/encoding.h" + +/// \file +/// \brief Contains binary encoding strategies for std::variant. + +namespace ceph { + +// null encoding for std::monostate +inline void encode(const std::monostate&, bufferlist& bl) {} +inline void decode(std::monostate&, bufferlist::const_iterator& p) {} + +// largest value that can be represented by `__u8 struct_v` +inline constexpr size_t max_version = std::numeric_limits<__u8>::max(); + +/// \namespace versioned_variant +/// \brief A backward-compatible binary encoding for std::variant. +/// +/// The variant index is encoded in struct_v so the correct decoder can be +/// selected. This means that existing variant types cannot be changed or +/// removed without breaking the decode of earlier ceph versions. New types +/// can only be added to the end of the variant. +/// +/// In addition to struct_v, the variant index is also encoded in compatv. As +/// the variant is extended, this means that existing decoders can continue to +/// decode the types they recognize, but reject the encodings of new types they +/// don't. +/// +/// The variant types themselves are free to change their encodings, provided +/// they manage their own versioning. The types must be default-constructible +/// so they can be constructed before decode. +/// +/// The contained encode/decode functions won't be found by argument-dependent +/// lookup, so you must either qualify the calls with `versioned_variant::` or +/// add `using namespace versioned_variant` to the calling scope. +namespace versioned_variant { + +// Requirements for the list of types for versioned std::variant encoding. +template <typename ...Ts> +concept valid_types = requires { + sizeof...(Ts) > 0; // variant cannot be empty + sizeof...(Ts) <= max_version; // index must fit in u8 + requires (std::default_initializable<Ts> && ...); // default-constructible + }; + +/// \brief A versioned_variant encoder. +/// +/// Example: +/// \code +/// struct example { +/// std::variant<int, bool> value; +/// +/// void encode(bufferlist& bl) const { +/// ENCODE_START(0, 0, bl); +/// ceph::versioned_variant::encode(value, bl); +/// ... +/// \endcode +template <typename ...Ts> requires valid_types<Ts...> +void encode(const std::variant<Ts...>& v, bufferlist& bl, uint64_t features=0) +{ + // encode the variant index in struct_v and compatv + const uint8_t ver = static_cast<uint8_t>(v.index()); + ENCODE_START(ver, ver, bl); + // use the variant type's encoder + std::visit([&bl] (const auto& value) mutable { + encode(value, bl); + }, v); + ENCODE_FINISH(bl); +} + +/// \brief A versioned_variant decoder. +/// +/// Example: +/// \code +/// struct example { +/// std::variant<int, bool> value; +/// +/// void decode(bufferlist::const_iterator& bl) const { +/// DECODE_START(0, bl); +/// ceph::versioned_variant::decode(value, bl); +/// ... +/// \endcode +template <typename ...Ts> requires valid_types<Ts...> +void decode(std::variant<Ts...>& v, bufferlist::const_iterator& p) +{ + constexpr uint8_t max_version = sizeof...(Ts) - 1; + DECODE_START(max_version, p); + // use struct_v as an index into the variant after converting it into a + // compile-time index I + const uint8_t index = struct_v; + boost::mp11::mp_with_index<sizeof...(Ts)>(index, [&v, &p] (auto I) { + // default-construct the type at index I and call its decoder + decode(v.template emplace<I>(), p); + }); + DECODE_FINISH(p); +} + +} // namespace versioned_variant + + +/// \namespace converted_variant +/// \brief A std::variant<T, ...> encoding that is backward-compatible with T. +/// +/// The encoding works the same as versioned_variant, except that a block of +/// version numbers are reserved for the first type T to allow its encoding +/// to continue evolving. T must itself use versioned encoding (ie +/// ENCODE_START/FINISH). +/// +/// This encoding strategy allows a serialized type T to be transparently +/// converted into a variant that can represent other types too. +namespace converted_variant { + +// For converted variants, reserve the first 128 versions for the original +// type. Variant types after the first use the version numbers above this. +inline constexpr uint8_t converted_max_version = 128; + +// Requirements for the list of types for converted std::variant encoding. +template <typename ...Ts> +concept valid_types = requires { + sizeof...(Ts) > 0; // variant cannot be empty + sizeof...(Ts) <= (max_version - converted_max_version); // index must fit in u8 + requires (std::default_initializable<Ts> && ...); // default-constructible + }; + +/// \brief A converted_variant encoder. +/// +/// Example: +/// \code +/// struct example { +/// std::variant<int, bool> value; // replaced `int value` +/// +/// void encode(bufferlist& bl) const { +/// ENCODE_START(1, 0, bl); +/// ceph::converted_variant::encode(value, bl); +/// ... +/// \endcode +template <typename ...Ts> requires valid_types<Ts...> +void encode(const std::variant<Ts...>& v, bufferlist& bl, uint64_t features=0) +{ + const uint8_t index = static_cast<uint8_t>(v.index()); + if (index == 0) { + // encode the first type with its own versioning scheme + encode(std::get<0>(v), bl); + return; + } + + // encode the variant index in struct_v and compatv + const uint8_t ver = converted_max_version + index; + ENCODE_START(ver, ver, bl); + // use the variant type's encoder + std::visit([&bl] (const auto& value) mutable { + encode(value, bl); + }, v); + ENCODE_FINISH(bl); +} + +/// \brief A converted_variant decoder. +/// +/// Example: +/// \code +/// struct example { +/// std::variant<int, bool> value; // replaced `int value` +/// +/// void decode(bufferlist::const_iterator& bl) { +/// DECODE_START(1, bl); +/// ceph::converted_variant::decode(value, bl); +/// ... +/// \endcode +template <typename ...Ts> requires valid_types<Ts...> +void decode(std::variant<Ts...>& v, bufferlist::const_iterator& p) +{ + // save the iterator position so the first type can restart decode + const bufferlist::const_iterator prev = p; + + constexpr uint8_t max_version = converted_max_version + sizeof...(Ts) - 1; + DECODE_START(max_version, p); + if (struct_v <= converted_max_version) { + p = prev; // rewind and use type 0's DECODE_START/FINISH + decode(v.template emplace<0>(), p); + return; + } + + // use struct_v as an index into the variant after converting it into a + // compile-time index I + const uint8_t index = struct_v - converted_max_version; + boost::mp11::mp_with_index<sizeof...(Ts)>(index, [&v, &p] (auto I) { + // default-construct the type at index I and call its decoder + decode(v.template emplace<I>(), p); + }); + DECODE_FINISH(p); +} + +} // namespace converted_variant + + +/// \brief Generate a list with a default-constructed variant of each type. +/// +/// This can be used in generate_test_instances() for types that contain +/// variants to ensure that an encoding of each type is present in the +/// ceph-object-corpus. This allows the ceph-dencoder tests to catch any +/// breaking changes to the variant types that are present in encodings. +template <typename ...Ts> +void generate_test_instances(std::list<std::variant<Ts...>>& instances) +{ + // use an immediately-invoked lambda to get a parameter pack of variant indices + [&instances] <std::size_t ...I> (std::index_sequence<I...>) { + // use a fold expression to call emplace_back() for each index in the pack + // use in_place_index to default-construct a variant of the type at index I + (instances.emplace_back(std::in_place_index<I>), ...); + } (std::make_index_sequence<sizeof...(Ts)>{}); +} + +} // namespace ceph diff --git a/src/crimson/common/shared_lru.h b/src/crimson/common/shared_lru.h index 186f02a614c..92d99d332c4 100644 --- a/src/crimson/common/shared_lru.h +++ b/src/crimson/common/shared_lru.h @@ -83,6 +83,7 @@ public: cache.clear(); } shared_ptr_t find(const K& key); + K cached_key_lower_bound(); // return the last element that is not greater than key shared_ptr_t lower_bound(const K& key); // return the first element that is greater than key @@ -147,6 +148,15 @@ SharedLRU<K,V>::find(const K& key) } template<class K, class V> +K SharedLRU<K,V>::cached_key_lower_bound() +{ + if (weak_refs.empty()) { + return {}; + } + return weak_refs.begin()->first; +} + +template<class K, class V> typename SharedLRU<K,V>::shared_ptr_t SharedLRU<K,V>::lower_bound(const K& key) { diff --git a/src/crimson/common/smp_helpers.h b/src/crimson/common/smp_helpers.h index c2b7bd9641a..429c938229b 100644 --- a/src/crimson/common/smp_helpers.h +++ b/src/crimson/common/smp_helpers.h @@ -3,10 +3,16 @@ #pragma once +#include <concepts> #include <limits> +#include <optional> +#include <type_traits> +#include <vector> +#include <seastar/core/shared_future.hh> #include <seastar/core/smp.hh> +#include "common/likely.h" #include "crimson/common/errorator.h" #include "crimson/common/utility.h" @@ -89,4 +95,142 @@ auto sharded_map_seq(T &t, F &&f) { }); } -} +enum class crosscore_type_t { + ONE, // from 1 to 1 core + ONE_N, // from 1 to n cores + N_ONE, // from n to 1 core +}; + +/** + * smp_crosscore_ordering_t + * + * To preserve the event order from source to target core(s). + */ +template <crosscore_type_t CTypeValue> +class smp_crosscore_ordering_t { + static constexpr bool IS_ONE = (CTypeValue == crosscore_type_t::ONE); + static constexpr bool IS_ONE_N = (CTypeValue == crosscore_type_t::ONE_N); + static constexpr bool IS_N_ONE = (CTypeValue == crosscore_type_t::N_ONE); + static_assert(IS_ONE || IS_ONE_N || IS_N_ONE); + +public: + using seq_t = uint64_t; + + smp_crosscore_ordering_t() requires IS_ONE + : out_seqs(0) { } + + smp_crosscore_ordering_t() requires (!IS_ONE) + : out_seqs(seastar::smp::count, 0), + in_controls(seastar::smp::count) {} + + ~smp_crosscore_ordering_t() = default; + + /* + * Called by the original core to get the ordering sequence + */ + + seq_t prepare_submit() requires IS_ONE { + return do_prepare_submit(out_seqs); + } + + seq_t prepare_submit(core_id_t target_core) requires IS_ONE_N { + return do_prepare_submit(out_seqs[target_core]); + } + + seq_t prepare_submit() requires IS_N_ONE { + return do_prepare_submit(out_seqs[seastar::this_shard_id()]); + } + + /* + * Called by the target core to preserve the ordering + */ + + seq_t get_in_seq() const requires IS_ONE { + return in_controls.seq; + } + + seq_t get_in_seq() const requires IS_ONE_N { + return in_controls[seastar::this_shard_id()].seq; + } + + seq_t get_in_seq(core_id_t source_core) const requires IS_N_ONE { + return in_controls[source_core].seq; + } + + bool proceed_or_wait(seq_t seq) requires IS_ONE { + return in_controls.proceed_or_wait(seq); + } + + bool proceed_or_wait(seq_t seq) requires IS_ONE_N { + return in_controls[seastar::this_shard_id()].proceed_or_wait(seq); + } + + bool proceed_or_wait(seq_t seq, core_id_t source_core) requires IS_N_ONE { + return in_controls[source_core].proceed_or_wait(seq); + } + + seastar::future<> wait(seq_t seq) requires IS_ONE { + return in_controls.wait(seq); + } + + seastar::future<> wait(seq_t seq) requires IS_ONE_N { + return in_controls[seastar::this_shard_id()].wait(seq); + } + + seastar::future<> wait(seq_t seq, core_id_t source_core) requires IS_N_ONE { + return in_controls[source_core].wait(seq); + } + + void reset_wait() requires IS_N_ONE { + for (auto &in_control : in_controls) { + in_control.reset_wait(); + } + } + +private: + struct in_control_t { + seq_t seq = 0; + std::optional<seastar::shared_promise<>> pr_wait; + + bool proceed_or_wait(seq_t in_seq) { + if (in_seq == seq + 1) { + ++seq; + reset_wait(); + return true; + } else { + return false; + } + } + + seastar::future<> wait(seq_t in_seq) { + assert(in_seq != seq + 1); + if (!pr_wait.has_value()) { + pr_wait = seastar::shared_promise<>(); + } + return pr_wait->get_shared_future(); + } + + void reset_wait() { + if (unlikely(pr_wait.has_value())) { + pr_wait->set_value(); + pr_wait = std::nullopt; + } + } + }; + + seq_t do_prepare_submit(seq_t &out_seq) { + return ++out_seq; + } + + std::conditional_t< + IS_ONE, + seq_t, std::vector<seq_t> + > out_seqs; + + std::conditional_t< + IS_ONE, + in_control_t, std::vector<in_control_t> + > in_controls; +}; + +} // namespace crimson diff --git a/src/crimson/mgr/client.h b/src/crimson/mgr/client.h index 501949768dd..b88c60c5e4a 100644 --- a/src/crimson/mgr/client.h +++ b/src/crimson/mgr/client.h @@ -24,7 +24,7 @@ namespace crimson::mgr // implement WithStats if you want to report stats to mgr periodically class WithStats { public: - virtual seastar::future<MessageURef> get_stats() const = 0; + virtual seastar::future<MessageURef> get_stats() = 0; virtual ~WithStats() {} }; diff --git a/src/crimson/net/Connection.h b/src/crimson/net/Connection.h index 7141e20f476..c19bfb1ff57 100644 --- a/src/crimson/net/Connection.h +++ b/src/crimson/net/Connection.h @@ -81,10 +81,35 @@ class Connection : public seastar::enable_shared_from_this<Connection> { * * Send a message over a connection that has completed its handshake. * - * May be invoked from any core, but that requires to chain the returned - * future to preserve ordering. + * May be invoked from any core, and the send order will be preserved upon + * the call. + * + * The returned future will be resolved only after the message is enqueued + * remotely. */ - virtual seastar::future<> send(MessageURef msg) = 0; + virtual seastar::future<> send( + MessageURef msg) = 0; + + /** + * send_with_throttling + * + * Send a message over a connection that has completed its handshake. + * + * May be invoked from any core, and the send order will be preserved upon + * the call. + * + * TODO: + * + * The returned future is reserved for throttling. + * + * Gating is needed for graceful shutdown, to wait until the message is + * enqueued remotely. + */ + seastar::future<> send_with_throttling( + MessageURef msg /* , seastar::gate & */) { + std::ignore = send(std::move(msg)); + return seastar::now(); + } /** * send_keepalive @@ -92,8 +117,8 @@ class Connection : public seastar::enable_shared_from_this<Connection> { * Send a keepalive message over a connection that has completed its * handshake. * - * May be invoked from any core, but that requires to chain the returned - * future to preserve ordering. + * May be invoked from any core, and the send order will be preserved upon + * the call. */ virtual seastar::future<> send_keepalive() = 0; diff --git a/src/crimson/net/Fwd.h b/src/crimson/net/Fwd.h index 2b159514193..3a56cf5bb0a 100644 --- a/src/crimson/net/Fwd.h +++ b/src/crimson/net/Fwd.h @@ -21,7 +21,7 @@ #include <seastar/core/sharded.hh> #include "msg/Connection.h" -#include "msg/MessageRef.h" +#include "msg/Message.h" #include "msg/msg_types.h" #include "crimson/common/errorator.h" diff --git a/src/crimson/net/ProtocolV2.cc b/src/crimson/net/ProtocolV2.cc index 55b669384ed..d4ef3881c40 100644 --- a/src/crimson/net/ProtocolV2.cc +++ b/src/crimson/net/ProtocolV2.cc @@ -2073,7 +2073,7 @@ void ProtocolV2::trigger_replacing(bool reconnect, // READY state seastar::future<> ProtocolV2::notify_out_fault( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, const char *where, std::exception_ptr eptr, io_handler_state _io_states) @@ -2121,7 +2121,7 @@ void ProtocolV2::execute_standby() } seastar::future<> ProtocolV2::notify_out( - crosscore_t::seq_t cc_seq) + cc_seq_t cc_seq) { assert(seastar::this_shard_id() == conn.get_messenger_shard_id()); if (!crosscore.proceed_or_wait(cc_seq)) { @@ -2210,7 +2210,7 @@ void ProtocolV2::execute_server_wait() // CLOSING state seastar::future<> ProtocolV2::notify_mark_down( - crosscore_t::seq_t cc_seq) + cc_seq_t cc_seq) { assert(seastar::this_shard_id() == conn.get_messenger_shard_id()); if (!crosscore.proceed_or_wait(cc_seq)) { diff --git a/src/crimson/net/ProtocolV2.h b/src/crimson/net/ProtocolV2.h index dd7a1e7039b..4262bbbc70c 100644 --- a/src/crimson/net/ProtocolV2.h +++ b/src/crimson/net/ProtocolV2.h @@ -29,16 +29,16 @@ public: */ private: seastar::future<> notify_out( - crosscore_t::seq_t cc_seq) final; + cc_seq_t cc_seq) final; seastar::future<> notify_out_fault( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, const char *where, std::exception_ptr, io_handler_state) final; seastar::future<> notify_mark_down( - crosscore_t::seq_t cc_seq) final; + cc_seq_t cc_seq) final; /* * as ProtocolV2 to be called by SocketConnection @@ -251,7 +251,7 @@ private: // asynchronously populated from io_handler io_handler_state io_states; - crosscore_t crosscore; + proto_crosscore_ordering_t crosscore; bool has_socket = false; diff --git a/src/crimson/net/SocketConnection.cc b/src/crimson/net/SocketConnection.cc index 57e5c12c1ae..76719268277 100644 --- a/src/crimson/net/SocketConnection.cc +++ b/src/crimson/net/SocketConnection.cc @@ -79,16 +79,13 @@ bool SocketConnection::peer_wins() const return (messenger.get_myaddr() > peer_addr || policy.server); } -seastar::future<> SocketConnection::send(MessageURef _msg) +seastar::future<> SocketConnection::send(MessageURef msg) { - // may be invoked from any core - MessageFRef msg = seastar::make_foreign(std::move(_msg)); return io_handler->send(std::move(msg)); } seastar::future<> SocketConnection::send_keepalive() { - // may be invoked from any core return io_handler->send_keepalive(); } diff --git a/src/crimson/net/SocketConnection.h b/src/crimson/net/SocketConnection.h index 823d6c574da..7d20f68867e 100644 --- a/src/crimson/net/SocketConnection.h +++ b/src/crimson/net/SocketConnection.h @@ -54,7 +54,7 @@ public: virtual bool is_connected() const = 0; - virtual seastar::future<> send(MessageFRef) = 0; + virtual seastar::future<> send(MessageURef) = 0; virtual seastar::future<> send_keepalive() = 0; diff --git a/src/crimson/net/io_handler.cc b/src/crimson/net/io_handler.cc index c414c48e12f..b9b0339f944 100644 --- a/src/crimson/net/io_handler.cc +++ b/src/crimson/net/io_handler.cc @@ -160,84 +160,132 @@ IOHandler::sweep_out_pending_msgs_to_sent( #endif } -seastar::future<> IOHandler::send(MessageFRef msg) +seastar::future<> IOHandler::send(MessageURef _msg) { + // may be invoked from any core + MessageFRef msg = seastar::make_foreign(std::move(_msg)); + auto cc_seq = io_crosscore.prepare_submit(); + auto source_core = seastar::this_shard_id(); // sid may be changed on-the-fly during the submission - if (seastar::this_shard_id() == get_shard_id()) { - return do_send(std::move(msg)); + if (source_core == get_shard_id()) { + return do_send(cc_seq, source_core, std::move(msg)); } else { - logger().trace("{} send() is directed to {} -- {}", - conn, get_shard_id(), *msg); + logger().trace("{} send() {} is directed to core {} -- {}", + conn, cc_seq, get_shard_id(), *msg); return seastar::smp::submit_to( - get_shard_id(), [this, msg=std::move(msg)]() mutable { - return send_redirected(std::move(msg)); + get_shard_id(), + [this, cc_seq, source_core, msg=std::move(msg)]() mutable { + return send_recheck_shard(cc_seq, source_core, std::move(msg)); }); } } -seastar::future<> IOHandler::send_redirected(MessageFRef msg) +seastar::future<> IOHandler::send_recheck_shard( + cc_seq_t cc_seq, + core_id_t source_core, + MessageFRef msg) { // sid may be changed on-the-fly during the submission if (seastar::this_shard_id() == get_shard_id()) { - return do_send(std::move(msg)); + return do_send(cc_seq, source_core, std::move(msg)); } else { - logger().debug("{} send() is redirected to {} -- {}", - conn, get_shard_id(), *msg); + logger().debug("{} send_recheck_shard() {} " + "is redirected from core {} to {} -- {}", + conn, cc_seq, source_core, get_shard_id(), *msg); return seastar::smp::submit_to( - get_shard_id(), [this, msg=std::move(msg)]() mutable { - return send_redirected(std::move(msg)); + get_shard_id(), + [this, cc_seq, source_core, msg=std::move(msg)]() mutable { + return send_recheck_shard(cc_seq, source_core, std::move(msg)); }); } } -seastar::future<> IOHandler::do_send(MessageFRef msg) +seastar::future<> IOHandler::do_send( + cc_seq_t cc_seq, + core_id_t source_core, + MessageFRef msg) { assert(seastar::this_shard_id() == get_shard_id()); - logger().trace("{} do_send() got message -- {}", conn, *msg); - if (get_io_state() != io_state_t::drop) { - out_pending_msgs.push_back(std::move(msg)); - notify_out_dispatch(); + if (io_crosscore.proceed_or_wait(cc_seq, source_core)) { + logger().trace("{} do_send() got {} from core {}: send message -- {}", + conn, cc_seq, source_core, *msg); + if (get_io_state() != io_state_t::drop) { + out_pending_msgs.push_back(std::move(msg)); + notify_out_dispatch(); + } + return seastar::now(); + } else { + logger().debug("{} do_send() got {} from core {}, wait at {} -- {}", + conn, cc_seq, source_core, + io_crosscore.get_in_seq(source_core), + *msg); + return io_crosscore.wait(cc_seq, source_core + ).then([this, cc_seq, source_core, msg=std::move(msg)]() mutable { + return send_recheck_shard(cc_seq, source_core, std::move(msg)); + }); } - return seastar::now(); } seastar::future<> IOHandler::send_keepalive() { + // may be invoked from any core + auto cc_seq = io_crosscore.prepare_submit(); + auto source_core = seastar::this_shard_id(); // sid may be changed on-the-fly during the submission - if (seastar::this_shard_id() == get_shard_id()) { - return do_send_keepalive(); + if (source_core == get_shard_id()) { + return do_send_keepalive(cc_seq, source_core); } else { - logger().trace("{} send_keepalive() is directed to {}", conn, get_shard_id()); + logger().trace("{} send_keepalive() {} is directed to core {}", + conn, cc_seq, get_shard_id()); return seastar::smp::submit_to( - get_shard_id(), [this] { - return send_keepalive_redirected(); + get_shard_id(), + [this, cc_seq, source_core] { + return send_keepalive_recheck_shard(cc_seq, source_core); }); } } -seastar::future<> IOHandler::send_keepalive_redirected() +seastar::future<> IOHandler::send_keepalive_recheck_shard( + cc_seq_t cc_seq, + core_id_t source_core) { // sid may be changed on-the-fly during the submission if (seastar::this_shard_id() == get_shard_id()) { - return do_send_keepalive(); + return do_send_keepalive(cc_seq, source_core); } else { - logger().debug("{} send_keepalive() is redirected to {}", conn, get_shard_id()); + logger().debug("{} send_keepalive_recheck_shard() {} " + "is redirected from core {} to {}", + conn, cc_seq, source_core, get_shard_id()); return seastar::smp::submit_to( - get_shard_id(), [this] { - return send_keepalive_redirected(); + get_shard_id(), + [this, cc_seq, source_core] { + return send_keepalive_recheck_shard(cc_seq, source_core); }); } } -seastar::future<> IOHandler::do_send_keepalive() +seastar::future<> IOHandler::do_send_keepalive( + cc_seq_t cc_seq, + core_id_t source_core) { assert(seastar::this_shard_id() == get_shard_id()); - logger().trace("{} do_send_keeplive(): need_keepalive={}", conn, need_keepalive); - if (!need_keepalive) { - need_keepalive = true; - notify_out_dispatch(); + if (io_crosscore.proceed_or_wait(cc_seq, source_core)) { + logger().trace("{} do_send_keeplive() got {} from core {}: need_keepalive={}", + conn, cc_seq, source_core, need_keepalive); + if (!need_keepalive) { + need_keepalive = true; + notify_out_dispatch(); + } + return seastar::now(); + } else { + logger().debug("{} do_send_keepalive() got {} from core {}, wait at {}", + conn, cc_seq, source_core, + io_crosscore.get_in_seq(source_core)); + return io_crosscore.wait(cc_seq, source_core + ).then([this, cc_seq, source_core] { + return send_keepalive_recheck_shard(cc_seq, source_core); + }); } - return seastar::now(); } void IOHandler::mark_down() @@ -249,7 +297,7 @@ void IOHandler::mark_down() return; } - auto cc_seq = crosscore.prepare_submit(); + auto cc_seq = proto_crosscore.prepare_submit(); logger().info("{} mark_down() at {}, send {} notify_mark_down()", conn, io_stat_printer{*this}, cc_seq); do_set_io_state(io_state_t::drop); @@ -292,7 +340,7 @@ void IOHandler::assign_frame_assembler(FrameAssemblerV2Ref fa) void IOHandler::do_set_io_state( io_state_t new_state, - std::optional<crosscore_t::seq_t> cc_seq, + std::optional<cc_seq_t> cc_seq, FrameAssemblerV2Ref fa, bool set_notify_out) { @@ -363,16 +411,16 @@ void IOHandler::do_set_io_state( } seastar::future<> IOHandler::set_io_state( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, io_state_t new_state, FrameAssemblerV2Ref fa, bool set_notify_out) { assert(seastar::this_shard_id() == get_shard_id()); - if (!crosscore.proceed_or_wait(cc_seq)) { + if (!proto_crosscore.proceed_or_wait(cc_seq)) { logger().debug("{} got {} set_io_state(), wait at {}", - conn, cc_seq, crosscore.get_in_seq()); - return crosscore.wait(cc_seq + conn, cc_seq, proto_crosscore.get_in_seq()); + return proto_crosscore.wait(cc_seq ).then([this, cc_seq, new_state, fa=std::move(fa), set_notify_out]() mutable { return set_io_state(cc_seq, new_state, std::move(fa), set_notify_out); @@ -385,13 +433,13 @@ seastar::future<> IOHandler::set_io_state( seastar::future<IOHandler::exit_dispatching_ret> IOHandler::wait_io_exit_dispatching( - crosscore_t::seq_t cc_seq) + cc_seq_t cc_seq) { assert(seastar::this_shard_id() == get_shard_id()); - if (!crosscore.proceed_or_wait(cc_seq)) { + if (!proto_crosscore.proceed_or_wait(cc_seq)) { logger().debug("{} got {} wait_io_exit_dispatching(), wait at {}", - conn, cc_seq, crosscore.get_in_seq()); - return crosscore.wait(cc_seq + conn, cc_seq, proto_crosscore.get_in_seq()); + return proto_crosscore.wait(cc_seq ).then([this, cc_seq] { return wait_io_exit_dispatching(cc_seq); }); @@ -429,14 +477,14 @@ IOHandler::wait_io_exit_dispatching( } seastar::future<> IOHandler::reset_session( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, bool full) { assert(seastar::this_shard_id() == get_shard_id()); - if (!crosscore.proceed_or_wait(cc_seq)) { + if (!proto_crosscore.proceed_or_wait(cc_seq)) { logger().debug("{} got {} reset_session(), wait at {}", - conn, cc_seq, crosscore.get_in_seq()); - return crosscore.wait(cc_seq + conn, cc_seq, proto_crosscore.get_in_seq()); + return proto_crosscore.wait(cc_seq ).then([this, cc_seq, full] { return reset_session(cc_seq, full); }); @@ -454,13 +502,13 @@ seastar::future<> IOHandler::reset_session( } seastar::future<> IOHandler::reset_peer_state( - crosscore_t::seq_t cc_seq) + cc_seq_t cc_seq) { assert(seastar::this_shard_id() == get_shard_id()); - if (!crosscore.proceed_or_wait(cc_seq)) { + if (!proto_crosscore.proceed_or_wait(cc_seq)) { logger().debug("{} got {} reset_peer_state(), wait at {}", - conn, cc_seq, crosscore.get_in_seq()); - return crosscore.wait(cc_seq + conn, cc_seq, proto_crosscore.get_in_seq()); + return proto_crosscore.wait(cc_seq ).then([this, cc_seq] { return reset_peer_state(cc_seq); }); @@ -476,13 +524,13 @@ seastar::future<> IOHandler::reset_peer_state( } seastar::future<> IOHandler::requeue_out_sent( - crosscore_t::seq_t cc_seq) + cc_seq_t cc_seq) { assert(seastar::this_shard_id() == get_shard_id()); - if (!crosscore.proceed_or_wait(cc_seq)) { + if (!proto_crosscore.proceed_or_wait(cc_seq)) { logger().debug("{} got {} requeue_out_sent(), wait at {}", - conn, cc_seq, crosscore.get_in_seq()); - return crosscore.wait(cc_seq + conn, cc_seq, proto_crosscore.get_in_seq()); + return proto_crosscore.wait(cc_seq ).then([this, cc_seq] { return requeue_out_sent(cc_seq); }); @@ -517,14 +565,14 @@ void IOHandler::do_requeue_out_sent() } seastar::future<> IOHandler::requeue_out_sent_up_to( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, seq_num_t msg_seq) { assert(seastar::this_shard_id() == get_shard_id()); - if (!crosscore.proceed_or_wait(cc_seq)) { + if (!proto_crosscore.proceed_or_wait(cc_seq)) { logger().debug("{} got {} requeue_out_sent_up_to(), wait at {}", - conn, cc_seq, crosscore.get_in_seq()); - return crosscore.wait(cc_seq + conn, cc_seq, proto_crosscore.get_in_seq()); + return proto_crosscore.wait(cc_seq ).then([this, cc_seq, msg_seq] { return requeue_out_sent_up_to(cc_seq, msg_seq); }); @@ -583,7 +631,7 @@ void IOHandler::discard_out_sent() seastar::future<> IOHandler::dispatch_accept( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, seastar::shard_id new_sid, ConnectionFRef conn_fref, bool is_replace) @@ -593,7 +641,7 @@ IOHandler::dispatch_accept( seastar::future<> IOHandler::dispatch_connect( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, seastar::shard_id new_sid, ConnectionFRef conn_fref) { @@ -620,16 +668,16 @@ IOHandler::cleanup_prv_shard(seastar::shard_id prv_sid) seastar::future<> IOHandler::to_new_sid( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, seastar::shard_id new_sid, ConnectionFRef conn_fref, std::optional<bool> is_replace) { ceph_assert_always(seastar::this_shard_id() == get_shard_id()); - if (!crosscore.proceed_or_wait(cc_seq)) { + if (!proto_crosscore.proceed_or_wait(cc_seq)) { logger().debug("{} got {} to_new_sid(), wait at {}", - conn, cc_seq, crosscore.get_in_seq()); - return crosscore.wait(cc_seq + conn, cc_seq, proto_crosscore.get_in_seq()); + return proto_crosscore.wait(cc_seq ).then([this, cc_seq, new_sid, is_replace, conn_fref=std::move(conn_fref)]() mutable { return to_new_sid(cc_seq, new_sid, std::move(conn_fref), is_replace); @@ -685,6 +733,8 @@ IOHandler::to_new_sid( shard_states = shard_states_t::create_from_previous( *maybe_prv_shard_states, new_sid); assert(new_sid == get_shard_id()); + // broadcast shard change to all the io waiters, atomically. + io_crosscore.reset_wait(); return seastar::smp::submit_to(new_sid, [this, next_cc_seq, is_dropped, prv_sid, is_replace, conn_fref=std::move(conn_fref)]() mutable { @@ -699,7 +749,7 @@ IOHandler::to_new_sid( ceph_assert_always(seastar::this_shard_id() == get_shard_id()); ceph_assert_always(get_io_state() != io_state_t::open); ceph_assert_always(!maybe_dropped_sid.has_value()); - ceph_assert_always(crosscore.proceed_or_wait(next_cc_seq)); + ceph_assert_always(proto_crosscore.proceed_or_wait(next_cc_seq)); if (is_dropped) { ceph_assert_always(get_io_state() == io_state_t::drop); @@ -735,7 +785,7 @@ IOHandler::to_new_sid( } seastar::future<> IOHandler::set_accepted_sid( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, seastar::shard_id sid, ConnectionFRef conn_fref) { @@ -749,7 +799,7 @@ seastar::future<> IOHandler::set_accepted_sid( return seastar::smp::submit_to(sid, [this, cc_seq, conn_fref=std::move(conn_fref)]() mutable { // must be the first to proceed - ceph_assert_always(crosscore.proceed_or_wait(cc_seq)); + ceph_assert_always(proto_crosscore.proceed_or_wait(cc_seq)); logger().debug("{} set accepted sid", conn); ceph_assert_always(seastar::this_shard_id() == get_shard_id()); @@ -875,7 +925,7 @@ IOHandler::do_out_dispatch(shard_states_t &ctx) } if (io_state == io_state_t::open) { - auto cc_seq = crosscore.prepare_submit(); + auto cc_seq = proto_crosscore.prepare_submit(); logger().info("{} do_out_dispatch(): fault at {}, {}, going to delay -- {}, " "send {} notify_out_fault()", conn, io_state, io_stat_printer{*this}, e.what(), cc_seq); @@ -922,7 +972,7 @@ void IOHandler::notify_out_dispatch() ceph_assert_always(seastar::this_shard_id() == get_shard_id()); assert(is_out_queued()); if (need_notify_out) { - auto cc_seq = crosscore.prepare_submit(); + auto cc_seq = proto_crosscore.prepare_submit(); logger().debug("{} send {} notify_out()", conn, cc_seq); shard_states->dispatch_in_background( @@ -1152,7 +1202,7 @@ void IOHandler::do_in_dispatch() auto io_state = ctx.get_io_state(); if (io_state == io_state_t::open) { - auto cc_seq = crosscore.prepare_submit(); + auto cc_seq = proto_crosscore.prepare_submit(); logger().info("{} do_in_dispatch(): fault at {}, {}, going to delay -- {}, " "send {} notify_out_fault()", conn, io_state, io_stat_printer{*this}, e_what, cc_seq); @@ -1183,15 +1233,15 @@ void IOHandler::do_in_dispatch() seastar::future<> IOHandler::close_io( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, bool is_dispatch_reset, bool is_replace) { ceph_assert_always(seastar::this_shard_id() == get_shard_id()); - if (!crosscore.proceed_or_wait(cc_seq)) { + if (!proto_crosscore.proceed_or_wait(cc_seq)) { logger().debug("{} got {} close_io(), wait at {}", - conn, cc_seq, crosscore.get_in_seq()); - return crosscore.wait(cc_seq + conn, cc_seq, proto_crosscore.get_in_seq()); + return proto_crosscore.wait(cc_seq ).then([this, cc_seq, is_dispatch_reset, is_replace] { return close_io(cc_seq, is_dispatch_reset, is_replace); }); diff --git a/src/crimson/net/io_handler.h b/src/crimson/net/io_handler.h index f53c2ba6468..8b88e2f5a25 100644 --- a/src/crimson/net/io_handler.h +++ b/src/crimson/net/io_handler.h @@ -5,10 +5,10 @@ #include <vector> -#include <seastar/core/shared_future.hh> #include <seastar/util/later.hh> #include "crimson/common/gated.h" +#include "crimson/common/smp_helpers.h" #include "Fwd.h" #include "SocketConnection.h" #include "FrameAssemblerV2.h" @@ -16,54 +16,6 @@ namespace crimson::net { /** - * crosscore_t - * - * To preserve the event order across cores. - */ -class crosscore_t { -public: - using seq_t = uint64_t; - - crosscore_t() = default; - ~crosscore_t() = default; - - seq_t get_in_seq() const { - return in_seq; - } - - seq_t prepare_submit() { - ++out_seq; - return out_seq; - } - - bool proceed_or_wait(seq_t seq) { - if (seq == in_seq + 1) { - ++in_seq; - if (unlikely(in_pr_wait.has_value())) { - in_pr_wait->set_value(); - in_pr_wait = std::nullopt; - } - return true; - } else { - return false; - } - } - - seastar::future<> wait(seq_t seq) { - assert(seq != in_seq + 1); - if (!in_pr_wait.has_value()) { - in_pr_wait = seastar::shared_promise<>(); - } - return in_pr_wait->get_shared_future(); - } - -private: - seq_t out_seq = 0; - seq_t in_seq = 0; - std::optional<seastar::shared_promise<>> in_pr_wait; -}; - -/** * io_handler_state * * It is required to populate the states from IOHandler to ProtocolV2 @@ -118,6 +70,9 @@ struct io_handler_state { */ class HandshakeListener { public: + using proto_crosscore_ordering_t = smp_crosscore_ordering_t<crosscore_type_t::ONE>; + using cc_seq_t = proto_crosscore_ordering_t::seq_t; + virtual ~HandshakeListener() = default; HandshakeListener(const HandshakeListener&) = delete; @@ -126,16 +81,16 @@ public: HandshakeListener &operator=(HandshakeListener &&) = delete; virtual seastar::future<> notify_out( - crosscore_t::seq_t cc_seq) = 0; + cc_seq_t cc_seq) = 0; virtual seastar::future<> notify_out_fault( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, const char *where, std::exception_ptr, io_handler_state) = 0; virtual seastar::future<> notify_mark_down( - crosscore_t::seq_t cc_seq) = 0; + cc_seq_t cc_seq) = 0; protected: HandshakeListener() = default; @@ -150,6 +105,10 @@ protected: */ class IOHandler final : public ConnectionHandler { public: + using io_crosscore_ordering_t = smp_crosscore_ordering_t<crosscore_type_t::N_ONE>; + using proto_crosscore_ordering_t = smp_crosscore_ordering_t<crosscore_type_t::ONE>; + using cc_seq_t = proto_crosscore_ordering_t::seq_t; + IOHandler(ChainedDispatchers &, SocketConnection &); @@ -173,7 +132,7 @@ public: return protocol_is_connected; } - seastar::future<> send(MessageFRef msg) final; + seastar::future<> send(MessageURef msg) final; seastar::future<> send_keepalive() final; @@ -221,7 +180,7 @@ public: void print_io_stat(std::ostream &out) const; seastar::future<> set_accepted_sid( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, seastar::shard_id sid, ConnectionFRef conn_fref); @@ -230,7 +189,7 @@ public: */ seastar::future<> close_io( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, bool is_dispatch_reset, bool is_replace); @@ -251,7 +210,7 @@ public: friend class fmt::formatter<io_state_t>; seastar::future<> set_io_state( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, io_state_t new_state, FrameAssemblerV2Ref fa, bool set_notify_out); @@ -262,30 +221,30 @@ public: }; seastar::future<exit_dispatching_ret> wait_io_exit_dispatching( - crosscore_t::seq_t cc_seq); + cc_seq_t cc_seq); seastar::future<> reset_session( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, bool full); seastar::future<> reset_peer_state( - crosscore_t::seq_t cc_seq); + cc_seq_t cc_seq); seastar::future<> requeue_out_sent_up_to( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, seq_num_t msg_seq); seastar::future<> requeue_out_sent( - crosscore_t::seq_t cc_seq); + cc_seq_t cc_seq); seastar::future<> dispatch_accept( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, seastar::shard_id new_sid, ConnectionFRef, bool is_replace); seastar::future<> dispatch_connect( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, seastar::shard_id new_sid, ConnectionFRef); @@ -426,7 +385,7 @@ public: void do_set_io_state( io_state_t new_state, - std::optional<crosscore_t::seq_t> cc_seq = std::nullopt, + std::optional<cc_seq_t> cc_seq = std::nullopt, FrameAssemblerV2Ref fa = nullptr, bool set_notify_out = false); @@ -440,16 +399,16 @@ public: void assign_frame_assembler(FrameAssemblerV2Ref); - seastar::future<> send_redirected(MessageFRef msg); + seastar::future<> send_recheck_shard(cc_seq_t, core_id_t, MessageFRef); - seastar::future<> do_send(MessageFRef msg); + seastar::future<> do_send(cc_seq_t, core_id_t, MessageFRef); - seastar::future<> send_keepalive_redirected(); + seastar::future<> send_keepalive_recheck_shard(cc_seq_t, core_id_t); - seastar::future<> do_send_keepalive(); + seastar::future<> do_send_keepalive(cc_seq_t, core_id_t); seastar::future<> to_new_sid( - crosscore_t::seq_t cc_seq, + cc_seq_t cc_seq, seastar::shard_id new_sid, ConnectionFRef, std::optional<bool> is_replace); @@ -509,7 +468,9 @@ public: private: shard_states_ref_t shard_states; - crosscore_t crosscore; + proto_crosscore_ordering_t proto_crosscore; + + io_crosscore_ordering_t io_crosscore; // drop was happening in the previous sid std::optional<seastar::shard_id> maybe_dropped_sid; diff --git a/src/crimson/osd/object_context_loader.cc b/src/crimson/osd/object_context_loader.cc index 0a4d74c0d70..d7d2b6d98c6 100644 --- a/src/crimson/osd/object_context_loader.cc +++ b/src/crimson/osd/object_context_loader.cc @@ -22,7 +22,7 @@ using crimson::common::local_conf; return get_or_load_obc<State>(obc, existed) .safe_then_interruptible( [func = std::move(func)](auto obc) { - return std::move(func)(std::move(obc)); + return std::move(func)(obc, obc); }); }).finally([FNAME, this, obc=std::move(obc)] { DEBUGDPP("released object {}", dpp, obc->get_oid()); @@ -39,7 +39,7 @@ using crimson::common::local_conf; assert(!oid.is_head()); return with_obc<RWState::RWREAD>( oid.get_head(), - [FNAME, oid, func=std::move(func), this](auto head) mutable + [FNAME, oid, func=std::move(func), this](auto head, auto) mutable -> load_obc_iertr::future<> { if (!head->obs.exists) { ERRORDPP("head doesn't exist for object {}", dpp, head->obs.oi.soid); @@ -70,12 +70,12 @@ using crimson::common::local_conf; auto [clone, existed] = obc_registry.get_cached_obc(*coid); return clone->template with_lock<State, IOInterruptCondition>( [existed=existed, clone=std::move(clone), - func=std::move(func), head=std::move(head), this]() + func=std::move(func), head=std::move(head), this]() mutable -> load_obc_iertr::future<> { auto loaded = get_or_load_obc<State>(clone, existed); return loaded.safe_then_interruptible( - [func = std::move(func)](auto clone) { - return std::move(func)(std::move(clone)); + [func = std::move(func), head=std::move(head)](auto clone) mutable { + return std::move(func)(std::move(head), std::move(clone)); }); }); } @@ -84,13 +84,13 @@ using crimson::common::local_conf; ObjectContextLoader::load_obc_iertr::future<> ObjectContextLoader::with_clone_obc_direct( hobject_t oid, - with_both_obc_func_t&& func) + with_obc_func_t&& func) { LOG_PREFIX(ObjectContextLoader::with_clone_obc_direct); assert(!oid.is_head()); return with_obc<RWState::RWREAD>( oid.get_head(), - [FNAME, oid, func=std::move(func), this](auto head) mutable + [FNAME, oid, func=std::move(func), this](auto head, auto) mutable -> load_obc_iertr::future<> { if (!head->obs.exists) { ERRORDPP("head doesn't exist for object {}", dpp, head->obs.oi.soid); @@ -228,5 +228,5 @@ using crimson::common::local_conf; template ObjectContextLoader::load_obc_iertr::future<> ObjectContextLoader::with_clone_obc_direct<RWState::RWWRITE>( hobject_t, - with_both_obc_func_t&&); + with_obc_func_t&&); } diff --git a/src/crimson/osd/object_context_loader.h b/src/crimson/osd/object_context_loader.h index 3ab7f6ad80f..0cd50623abc 100644 --- a/src/crimson/osd/object_context_loader.h +++ b/src/crimson/osd/object_context_loader.h @@ -30,9 +30,6 @@ public: load_obc_ertr>; using with_obc_func_t = - std::function<load_obc_iertr::future<> (ObjectContextRef)>; - - using with_both_obc_func_t = std::function<load_obc_iertr::future<> (ObjectContextRef, ObjectContextRef)>; // Use this variant by default @@ -55,7 +52,7 @@ public: template<RWState::State State> load_obc_iertr::future<> with_clone_obc_direct( hobject_t oid, - with_both_obc_func_t&& func); + with_obc_func_t&& func); load_obc_iertr::future<> reload_obc(ObjectContext& obc) const; diff --git a/src/crimson/osd/ops_executer.cc b/src/crimson/osd/ops_executer.cc index 040870203bd..09b1a492576 100644 --- a/src/crimson/osd/ops_executer.cc +++ b/src/crimson/osd/ops_executer.cc @@ -797,7 +797,7 @@ void OpsExecuter::fill_op_params_bump_pg_version() { osd_op_params->req_id = msg->get_reqid(); osd_op_params->mtime = msg->get_mtime(); - osd_op_params->at_version = pg->next_version(); + osd_op_params->at_version = pg->get_next_version(); osd_op_params->pg_trim_to = pg->get_pg_trim_to(); osd_op_params->min_last_complete_ondisk = pg->get_min_last_complete_ondisk(); osd_op_params->last_complete = pg->get_info().last_complete; @@ -820,6 +820,7 @@ std::vector<pg_log_entry_t> OpsExecuter::prepare_transaction( osd_op_params->req_id, osd_op_params->mtime, op_info.allows_returnvec() && !ops.empty() ? ops.back().rval.code : 0); + osd_op_params->at_version.version++; if (op_info.allows_returnvec()) { // also the per-op values are recorded in the pg log log_entries.back().set_op_returns(ops); @@ -957,6 +958,7 @@ std::unique_ptr<OpsExecuter::CloningContext> OpsExecuter::execute_clone( initial_obs.oi.mtime, // will be replaced in `apply_to()` 0 }; + osd_op_params->at_version.version++; encode(cloned_snaps, cloning_ctx->log_entry.snaps); // TODO: update most recent clone_overlap and usage stats @@ -968,7 +970,7 @@ void OpsExecuter::CloningContext::apply_to( ObjectContext& processed_obc) && { log_entry.mtime = processed_obc.obs.oi.mtime; - log_entries.emplace_back(std::move(log_entry)); + log_entries.insert(log_entries.begin(), std::move(log_entry)); processed_obc.ssc->snapset = std::move(new_snapset); } @@ -983,7 +985,7 @@ OpsExecuter::flush_clone_metadata( auto maybe_snap_mapped = interruptor::now(); if (cloning_ctx) { std::move(*cloning_ctx).apply_to(log_entries, *obc); - const auto& coid = log_entries.back().soid; + const auto& coid = log_entries.front().soid; const auto& cloned_snaps = obc->ssc->snapset.clone_snaps[coid.snap]; maybe_snap_mapped = snap_map_clone( coid, @@ -1012,7 +1014,7 @@ std::pair<object_info_t, ObjectContextRef> OpsExecuter::prepare_clone( const hobject_t& coid) { object_info_t static_snap_oi(coid); - static_snap_oi.version = pg->next_version(); + static_snap_oi.version = osd_op_params->at_version; static_snap_oi.prior_version = obc->obs.oi.version; static_snap_oi.copy_user_bits(obc->obs.oi); if (static_snap_oi.is_whiteout()) { diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index 8da2d566e6b..f3648c6df27 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -400,7 +400,11 @@ seastar::future<> OSD::start() ); }).then([this](OSDSuperblock&& sb) { superblock = std::move(sb); - pg_shard_manager.set_superblock(superblock); + if (!superblock.cluster_osdmap_trim_lower_bound) { + superblock.cluster_osdmap_trim_lower_bound = superblock.get_oldest_map(); + } + return pg_shard_manager.set_superblock(superblock); + }).then([this] { return pg_shard_manager.get_local_map(superblock.current_epoch); }).then([this](OSDMapService::local_cached_map_t&& map) { osdmap = make_local_shared_foreign(OSDMapService::local_cached_map_t(map)); @@ -864,6 +868,25 @@ void OSD::handle_authentication(const EntityName& name, } } +const char** OSD::get_tracked_conf_keys() const +{ + static const char* KEYS[] = { + "osd_beacon_report_interval", + nullptr + }; + return KEYS; +} + +void OSD::handle_conf_change( + const crimson::common::ConfigProxy& conf, + const std::set <std::string> &changed) +{ + if (changed.count("osd_beacon_report_interval")) { + beacon_timer.rearm_periodic( + std::chrono::seconds(conf->osd_beacon_report_interval)); + } +} + void OSD::update_stats() { osd_stat_seq++; @@ -879,13 +902,20 @@ void OSD::update_stats() }); } -seastar::future<MessageURef> OSD::get_stats() const +seastar::future<MessageURef> OSD::get_stats() { // MPGStats::had_map_for is not used since PGMonitor was removed auto m = crimson::make_message<MPGStats>(monc->get_fsid(), osdmap->get_epoch()); m->osd_stat = osd_stat; return pg_shard_manager.get_pg_stats( - ).then([m=std::move(m)](auto &&stats) mutable { + ).then([this, m=std::move(m)](auto &&stats) mutable { + min_last_epoch_clean = osdmap->get_epoch(); + min_last_epoch_clean_pgs.clear(); + for (auto [pgid, stat] : stats) { + min_last_epoch_clean = std::min(min_last_epoch_clean, + stat.get_effective_last_epoch_clean()); + min_last_epoch_clean_pgs.push_back(pgid); + } m->pg_stat = std::move(stats); return seastar::make_ready_future<MessageURef>(std::move(m)); }); @@ -934,6 +964,16 @@ seastar::future<> OSD::_handle_osd_map(Ref<MOSDMap> m) logger().info("handle_osd_map epochs [{}..{}], i have {}, src has [{}..{}]", first, last, superblock.get_newest_map(), m->cluster_osdmap_trim_lower_bound, m->newest_map); + + if (superblock.cluster_osdmap_trim_lower_bound < + m->cluster_osdmap_trim_lower_bound) { + superblock.cluster_osdmap_trim_lower_bound = + m->cluster_osdmap_trim_lower_bound; + logger().debug("{} superblock cluster_osdmap_trim_lower_bound new epoch is: {}", + __func__, superblock.cluster_osdmap_trim_lower_bound); + ceph_assert( + superblock.cluster_osdmap_trim_lower_bound >= superblock.get_oldest_map()); + } // make sure there is something new, here, before we bother flushing // the queues and such if (last <= superblock.get_newest_map()) { @@ -964,8 +1004,9 @@ seastar::future<> OSD::_handle_osd_map(Ref<MOSDMap> m) monc->sub_got("osdmap", last); if (!superblock.maps.empty()) { - // TODO: support osdmap trimming - // See: <tracker> + pg_shard_manager.trim_maps(t, superblock); + // TODO: once we support pg splitting, update pg_num_history here + //pg_num_history.prune(superblock.get_oldest_map()); } superblock.insert_osdmap_epochs(first, last); @@ -977,11 +1018,13 @@ seastar::future<> OSD::_handle_osd_map(Ref<MOSDMap> m) superblock.clean_thru = last; } pg_shard_manager.get_meta_coll().store_superblock(t, superblock); - pg_shard_manager.set_superblock(superblock); - logger().debug("OSD::handle_osd_map: do_transaction..."); - return store.get_sharded_store().do_transaction( - pg_shard_manager.get_meta_coll().collection(), - std::move(t)); + return pg_shard_manager.set_superblock(superblock).then( + [this, &t] { + logger().debug("OSD::handle_osd_map: do_transaction..."); + return store.get_sharded_store().do_transaction( + pg_shard_manager.get_meta_coll().collection(), + std::move(t)); + }); }); }).then([=, this] { // TODO: write to superblock and commit the transaction @@ -1266,14 +1309,13 @@ seastar::future<> OSD::send_beacon() if (!pg_shard_manager.is_active()) { return seastar::now(); } - // FIXME: min lec should be calculated from pg_stat - // and should set m->pgs - epoch_t min_last_epoch_clean = osdmap->get_epoch(); - auto m = crimson::make_message<MOSDBeacon>(osdmap->get_epoch(), + auto beacon = crimson::make_message<MOSDBeacon>(osdmap->get_epoch(), min_last_epoch_clean, superblock.last_purged_snaps_scrub, local_conf()->osd_beacon_report_interval); - return monc->send_message(std::move(m)); + beacon->pgs = min_last_epoch_clean_pgs; + logger().debug("{} {}", __func__, *beacon); + return monc->send_message(std::move(beacon)); } seastar::future<> OSD::update_heartbeat_peers() diff --git a/src/crimson/osd/osd.h b/src/crimson/osd/osd.h index 10ff60d4701..134376ad947 100644 --- a/src/crimson/osd/osd.h +++ b/src/crimson/osd/osd.h @@ -61,7 +61,8 @@ class PG; class OSD final : public crimson::net::Dispatcher, private crimson::common::AuthHandler, - private crimson::mgr::WithStats { + private crimson::mgr::WithStats, + public md_config_obs_t { const int whoami; const uint32_t nonce; seastar::abort_source& abort_source; @@ -106,8 +107,11 @@ class OSD final : public crimson::net::Dispatcher, // pg statistics including osd ones osd_stat_t osd_stat; uint32_t osd_stat_seq = 0; + epoch_t min_last_epoch_clean = 0; + // which pgs were scanned for min_lec + std::vector<pg_t> min_last_epoch_clean_pgs; void update_stats(); - seastar::future<MessageURef> get_stats() const final; + seastar::future<MessageURef> get_stats() final; // AuthHandler methods void handle_authentication(const EntityName& name, @@ -123,6 +127,10 @@ class OSD final : public crimson::net::Dispatcher, std::unique_ptr<Heartbeat> heartbeat; seastar::timer<seastar::lowres_clock> tick_timer; + const char** get_tracked_conf_keys() const final; + void handle_conf_change(const ConfigProxy& conf, + const std::set<std::string> &changed) final; + // admin-socket seastar::lw_shared_ptr<crimson::admin::AdminSocket> asok; diff --git a/src/crimson/osd/osd_connection_priv.h b/src/crimson/osd/osd_connection_priv.h index 2d2a459017b..3c7d085c06e 100644 --- a/src/crimson/osd/osd_connection_priv.h +++ b/src/crimson/osd/osd_connection_priv.h @@ -3,8 +3,7 @@ #pragma once -#include <seastar/core/smp.hh> - +#include "crimson/common/smp_helpers.h" #include "crimson/net/Connection.h" #include "crimson/osd/osd_operation.h" #include "crimson/osd/osd_operations/client_request.h" @@ -13,75 +12,9 @@ namespace crimson::osd { -/** - * crosscore_ordering_t - * - * To preserve the event order from 1 source to n target cores. - */ -class crosscore_ordering_t { -public: - using seq_t = uint64_t; - - crosscore_ordering_t() - : out_seqs(seastar::smp::count, 0), - in_controls(seastar::smp::count) {} - - ~crosscore_ordering_t() = default; - - // Called by the original core to get the ordering sequence - seq_t prepare_submit(core_id_t target_core) { - auto &out_seq = out_seqs[target_core]; - ++out_seq; - return out_seq; - } - - /* - * Called by the target core to preserve the ordering - */ - - seq_t get_in_seq() const { - auto core = seastar::this_shard_id(); - return in_controls[core].seq; - } - - bool proceed_or_wait(seq_t seq) { - auto core = seastar::this_shard_id(); - auto &in_control = in_controls[core]; - if (seq == in_control.seq + 1) { - ++in_control.seq; - if (unlikely(in_control.pr_wait.has_value())) { - in_control.pr_wait->set_value(); - in_control.pr_wait = std::nullopt; - } - return true; - } else { - return false; - } - } - - seastar::future<> wait(seq_t seq) { - auto core = seastar::this_shard_id(); - auto &in_control = in_controls[core]; - assert(seq != in_control.seq + 1); - if (!in_control.pr_wait.has_value()) { - in_control.pr_wait = seastar::shared_promise<>(); - } - return in_control.pr_wait->get_shared_future(); - } - -private: - struct in_control_t { - seq_t seq = 0; - std::optional<seastar::shared_promise<>> pr_wait; - }; - - // source-side - std::vector<seq_t> out_seqs; - // target-side - std::vector<in_control_t> in_controls; -}; - struct OSDConnectionPriv : public crimson::net::Connection::user_private_t { + using crosscore_ordering_t = smp_crosscore_ordering_t<crosscore_type_t::ONE_N>; + ConnectionPipeline client_request_conn_pipeline; ConnectionPipeline peering_request_conn_pipeline; ConnectionPipeline replicated_request_conn_pipeline; diff --git a/src/crimson/osd/osd_meta.cc b/src/crimson/osd/osd_meta.cc index e40b2b2464b..06d6932bc54 100644 --- a/src/crimson/osd/osd_meta.cc +++ b/src/crimson/osd/osd_meta.cc @@ -9,6 +9,7 @@ #include "crimson/os/futurized_collection.h" #include "crimson/os/futurized_store.h" #include "os/Transaction.h" +#include "osd/OSDMap.h" using std::string; using read_errorator = crimson::os::FuturizedStore::Shard::read_errorator; @@ -24,6 +25,11 @@ void OSDMeta::store_map(ceph::os::Transaction& t, t.write(coll->get_cid(), osdmap_oid(e), 0, m.length(), m); } +void OSDMeta::remove_map(ceph::os::Transaction& t, epoch_t e) +{ + t.remove(coll->get_cid(), osdmap_oid(e)); +} + seastar::future<bufferlist> OSDMeta::load_map(epoch_t e) { return store.read(coll, @@ -80,6 +86,36 @@ OSDMeta::load_final_pool_info(int64_t pool) { })); } +void OSDMeta::store_final_pool_info( + ceph::os::Transaction &t, + OSDMap* lastmap, + std::map<epoch_t, OSDMap*> &added_map) +{ + for (auto [e, map] : added_map) { + if (!lastmap) { + lastmap = map; + continue; + } + for (auto &[pool_id, pool] : lastmap->get_pools()) { + if (!map->have_pg_pool(pool_id)) { + ghobject_t obj = final_pool_info_oid(pool_id); + bufferlist bl; + encode(pool, bl, CEPH_FEATURES_ALL); + string name = lastmap->get_pool_name(pool_id); + encode(name, bl); + std::map<string, string> profile; + if (pool.is_erasure()) { + profile = lastmap->get_erasure_code_profile( + pool.erasure_code_profile); + } + encode(profile, bl); + t.write(coll->get_cid(), obj, 0, bl.length(), bl); + } + } + lastmap = map; + } +} + ghobject_t OSDMeta::osdmap_oid(epoch_t epoch) { string name = fmt::format("osdmap.{}", epoch); diff --git a/src/crimson/osd/osd_meta.h b/src/crimson/osd/osd_meta.h index 652266d9e20..2363671e330 100644 --- a/src/crimson/osd/osd_meta.h +++ b/src/crimson/osd/osd_meta.h @@ -40,6 +40,7 @@ public: void store_map(ceph::os::Transaction& t, epoch_t e, const bufferlist& m); + void remove_map(ceph::os::Transaction& t, epoch_t e); seastar::future<bufferlist> load_map(epoch_t e); void store_superblock(ceph::os::Transaction& t, @@ -53,6 +54,10 @@ public: seastar::future<std::tuple<pg_pool_t, std::string, ec_profile_t>> load_final_pool_info(int64_t pool); + void store_final_pool_info( + ceph::os::Transaction&, + OSDMap* lastmap, + std::map<epoch_t, OSDMap*>&); private: static ghobject_t osdmap_oid(epoch_t epoch); static ghobject_t final_pool_info_oid(int64_t pool); diff --git a/src/crimson/osd/osd_operations/client_request.cc b/src/crimson/osd/osd_operations/client_request.cc index 2160e4823f7..3fba73a0c23 100644 --- a/src/crimson/osd/osd_operations/client_request.cc +++ b/src/crimson/osd/osd_operations/client_request.cc @@ -204,7 +204,8 @@ ClientRequest::process_pg_op( return pg->do_pg_ops( m ).then_interruptible([this, pg=std::move(pg)](MURef<MOSDOpReply> reply) { - return conn->send(std::move(reply)); + // TODO: gate the crosscore sending + return conn->send_with_throttling(std::move(reply)); }); } @@ -218,25 +219,17 @@ auto ClientRequest::reply_op_error(const Ref<PG>& pg, int err) !m->has_flag(CEPH_OSD_FLAG_RETURNVEC)); reply->set_reply_versions(eversion_t(), 0); reply->set_op_returns(std::vector<pg_log_op_return_item_t>{}); - return conn->send(std::move(reply)); + // TODO: gate the crosscore sending + return conn->send_with_throttling(std::move(reply)); } ClientRequest::interruptible_future<> ClientRequest::process_op(instance_handle_t &ihref, Ref<PG> &pg) { return ihref.enter_stage<interruptor>( - client_pp(*pg).recover_missing, - *this - ).then_interruptible( - [this, pg]() mutable { - LOG_PREFIX(ClientRequest::process_op); - if (pg->is_primary()) { - return do_recover_missing(pg, m->get_hobj()); - } else { - DEBUGI("process_op: Skipping do_recover_missing" - "on non primary pg"); - return interruptor::now(); - } + client_pp(*pg).recover_missing, *this + ).then_interruptible([pg, this]() mutable { + return recover_missings(pg, m->get_hobj(), snaps_need_to_recover()); }).then_interruptible([this, pg, &ihref]() mutable { return pg->already_complete(m->get_reqid()).then_interruptible( [this, pg, &ihref](auto completed) mutable @@ -246,7 +239,8 @@ ClientRequest::process_op(instance_handle_t &ihref, Ref<PG> &pg) m.get(), completed->err, pg->get_osdmap_epoch(), CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK, false); reply->set_reply_versions(completed->version, completed->user_version); - return conn->send(std::move(reply)); + // TODO: gate the crosscore sending + return conn->send_with_throttling(std::move(reply)); } else { return ihref.enter_stage<interruptor>(client_pp(*pg).get_obc, *this ).then_interruptible( @@ -256,7 +250,7 @@ ClientRequest::process_op(instance_handle_t &ihref, Ref<PG> &pg) op_info.set_from_op(&*m, *pg->get_osdmap()); return pg->with_locked_obc( m->get_hobj(), op_info, - [this, pg, &ihref](auto obc) mutable { + [this, pg, &ihref](auto head, auto obc) mutable { LOG_PREFIX(ClientRequest::process_op); DEBUGI("{}: got obc {}", *this, obc->obs); return ihref.enter_stage<interruptor>( @@ -319,13 +313,13 @@ ClientRequest::do_process( SnapContext snapc = get_snapc(pg,obc); - if ((m->has_flag(CEPH_OSD_FLAG_ORDERSNAP)) && - snapc.seq < obc->ssc->snapset.seq) { - DEBUGI("{} ORDERSNAP flag set and snapc seq {}", - " < snapset seq {} on {}", - __func__, snapc.seq, obc->ssc->snapset.seq, - obc->obs.oi.soid); - return reply_op_error(pg, -EOLDSNAPC); + if (m->has_flag(CEPH_OSD_FLAG_ORDERSNAP) && + snapc.seq < obc->ssc->snapset.seq) { + DEBUGI("{} ORDERSNAP flag set and snapc seq {}", + " < snapset seq {} on {}", + __func__, snapc.seq, obc->ssc->snapset.seq, + obc->obs.oi.soid); + return reply_op_error(pg, -EOLDSNAPC); } if (!pg->is_primary()) { @@ -360,8 +354,10 @@ ClientRequest::do_process( [this, reply=std::move(reply)]() mutable { LOG_PREFIX(ClientRequest::do_process); DEBUGI("{}: sending response", *this); - return conn->send(std::move(reply)); - }); + // TODO: gate the crosscore sending + return conn->send_with_throttling(std::move(reply)); + } + ); }, crimson::ct_error::eagain::handle([this, pg, &ihref]() mutable { return process_op(ihref, pg); })); diff --git a/src/crimson/osd/osd_operations/client_request.h b/src/crimson/osd/osd_operations/client_request.h index 3c8c146a4f2..43535e156c5 100644 --- a/src/crimson/osd/osd_operations/client_request.h +++ b/src/crimson/osd/osd_operations/client_request.h @@ -160,6 +160,16 @@ public: } auto get_instance_handle() { return instance_handle; } + std::vector<snapid_t> snaps_need_to_recover() { + std::vector<snapid_t> ret; + for (auto &op : m->ops) { + if (op.op.op == CEPH_OSD_OP_ROLLBACK) { + ret.emplace_back((snapid_t)op.op.snap.snapid); + } + } + return ret; + } + using ordering_hook_t = boost::intrusive::list_member_hook<>; ordering_hook_t ordering_hook; class Orderer { diff --git a/src/crimson/osd/osd_operations/client_request_common.cc b/src/crimson/osd/osd_operations/client_request_common.cc index cfd22c774e0..903da59a3dc 100644 --- a/src/crimson/osd/osd_operations/client_request_common.cc +++ b/src/crimson/osd/osd_operations/client_request_common.cc @@ -11,8 +11,54 @@ namespace { } } +SET_SUBSYS(osd); + namespace crimson::osd { +InterruptibleOperation::template interruptible_future<> +CommonClientRequest::recover_missings( + Ref<PG> &pg, + const hobject_t& soid, + std::vector<snapid_t> &&snaps) +{ + using interruptor = InterruptibleOperation::interruptor; + LOG_PREFIX(CommonClientRequest::recover_missings); + auto fut = interruptor::now(); + if (!pg->is_primary()) { + DEBUGI("process_op: Skipping do_recover_missing on non primary pg"); + return fut; + } + if (!soid.is_head()) { + fut = do_recover_missing(pg, soid.get_head()); + } + return seastar::do_with( + std::move(snaps), + [pg, soid, fut=std::move(fut)](auto &snaps) mutable { + return fut.then_interruptible([&snaps, pg, soid]() mutable { + return pg->obc_loader.with_obc<RWState::RWREAD>( + soid.get_head(), + [&snaps, pg, soid](auto head, auto) mutable { + auto oid = resolve_oid(head->get_head_ss(), soid); + assert(oid); + return do_recover_missing(pg, *oid + ).then_interruptible([&snaps, pg, soid, head]() mutable { + return InterruptibleOperation::interruptor::do_for_each( + snaps, + [pg, soid, head](auto &snap) mutable { + auto coid = head->obs.oi.soid; + coid.snap = snap; + auto oid = resolve_oid(head->get_head_ss(), coid); + assert(oid); + return do_recover_missing(pg, *oid); + }); + }); + }); + }).handle_error_interruptible( + crimson::ct_error::assert_all("unexpected error") + ); + }); +} + typename InterruptibleOperation::template interruptible_future<> CommonClientRequest::do_recover_missing( Ref<PG>& pg, const hobject_t& soid) diff --git a/src/crimson/osd/osd_operations/client_request_common.h b/src/crimson/osd/osd_operations/client_request_common.h index 6a8a789668c..46aa038e343 100644 --- a/src/crimson/osd/osd_operations/client_request_common.h +++ b/src/crimson/osd/osd_operations/client_request_common.h @@ -10,6 +10,13 @@ namespace crimson::osd { struct CommonClientRequest { + + static InterruptibleOperation::template interruptible_future<> + recover_missings( + Ref<PG> &pg, + const hobject_t& soid, + std::vector<snapid_t> &&snaps); + static InterruptibleOperation::template interruptible_future<> do_recover_missing(Ref<PG>& pg, const hobject_t& soid); diff --git a/src/crimson/osd/osd_operations/common/pg_pipeline.h b/src/crimson/osd/osd_operations/common/pg_pipeline.h index 58fa07b8b4d..1e5d0e511f6 100644 --- a/src/crimson/osd/osd_operations/common/pg_pipeline.h +++ b/src/crimson/osd/osd_operations/common/pg_pipeline.h @@ -19,7 +19,7 @@ protected: } wait_for_active; struct RecoverMissing : OrderedExclusivePhaseT<RecoverMissing> { static constexpr auto type_name = "CommonPGPipeline::recover_missing"; - } recover_missing; + } recover_missing, recover_missing2; struct GetOBC : OrderedExclusivePhaseT<GetOBC> { static constexpr auto type_name = "CommonPGPipeline::get_obc"; } get_obc; diff --git a/src/crimson/osd/osd_operations/internal_client_request.cc b/src/crimson/osd/osd_operations/internal_client_request.cc index ea45e92c148..527d982f1e9 100644 --- a/src/crimson/osd/osd_operations/internal_client_request.cc +++ b/src/crimson/osd/osd_operations/internal_client_request.cc @@ -85,7 +85,7 @@ seastar::future<> InternalClientRequest::start() std::as_const(osd_ops), pg->get_pgid().pgid, *pg->get_osdmap()); assert(ret == 0); return pg->with_locked_obc(get_target_oid(), op_info, - [&osd_ops, this](auto obc) { + [&osd_ops, this](auto, auto obc) { return enter_stage<interruptor>(client_pp().process ).then_interruptible( [obc=std::move(obc), &osd_ops, this] { diff --git a/src/crimson/osd/osd_operations/snaptrim_event.cc b/src/crimson/osd/osd_operations/snaptrim_event.cc index 8ae36a5483d..20f7439f52f 100644 --- a/src/crimson/osd/osd_operations/snaptrim_event.cc +++ b/src/crimson/osd/osd_operations/snaptrim_event.cc @@ -220,8 +220,7 @@ SnapTrimObjSubEvent::remove_or_update_iertr::future<> SnapTrimObjSubEvent::remove_clone( ObjectContextRef obc, ObjectContextRef head_obc, - ceph::os::Transaction& txn, - std::vector<pg_log_entry_t>& log_entries + ceph::os::Transaction& txn ) { const auto p = std::find( head_obc->ssc->snapset.clones.begin(), @@ -268,17 +267,14 @@ SnapTrimObjSubEvent::remove_clone( head_obc->ssc->snapset.clone_size.erase(last); head_obc->ssc->snapset.clone_snaps.erase(last); - log_entries.emplace_back( - pg_log_entry_t{ - pg_log_entry_t::DELETE, - coid, - osd_op_p.at_version, - obc->obs.oi.version, - 0, - osd_reqid_t(), - obc->obs.oi.mtime, // will be replaced in `apply_to()` - 0} - ); + add_log_entry( + pg_log_entry_t::DELETE, + coid, + obc->obs.oi.version, + 0, + osd_reqid_t(), + obc->obs.oi.mtime, // will be replaced in `apply_to()` + 0); txn.remove( pg->get_collection_ref()->get_cid(), ghobject_t{coid, ghobject_t::NO_GEN, shard_id_t::NO_SHARD}); @@ -289,8 +285,7 @@ SnapTrimObjSubEvent::remove_clone( void SnapTrimObjSubEvent::remove_head_whiteout( ObjectContextRef obc, ObjectContextRef head_obc, - ceph::os::Transaction& txn, - std::vector<pg_log_entry_t>& log_entries + ceph::os::Transaction& txn ) { // NOTE: this arguably constitutes minor interference with the // tiering agent if this is a cache tier since a snap trim event @@ -299,17 +294,14 @@ void SnapTrimObjSubEvent::remove_head_whiteout( const auto head_oid = coid.get_head(); logger().info("{}: {} removing {}", *this, coid, head_oid); - log_entries.emplace_back( - pg_log_entry_t{ - pg_log_entry_t::DELETE, - head_oid, - osd_op_p.at_version, - head_obc->obs.oi.version, - 0, - osd_reqid_t(), - obc->obs.oi.mtime, // will be replaced in `apply_to()` - 0} - ); + add_log_entry( + pg_log_entry_t::DELETE, + head_oid, + head_obc->obs.oi.version, + 0, + osd_reqid_t(), + obc->obs.oi.mtime, // will be replaced in `apply_to()` + 0); logger().info("{}: remove snap head", *this); object_info_t& oi = head_obc->obs.oi; delta_stats.num_objects--; @@ -335,8 +327,7 @@ SnapTrimObjSubEvent::adjust_snaps( ObjectContextRef obc, ObjectContextRef head_obc, const std::set<snapid_t>& new_snaps, - ceph::os::Transaction& txn, - std::vector<pg_log_entry_t>& log_entries + ceph::os::Transaction& txn ) { head_obc->ssc->snapset.clone_snaps[coid.snap] = std::vector<snapid_t>(new_snaps.rbegin(), new_snaps.rend()); @@ -354,17 +345,14 @@ SnapTrimObjSubEvent::adjust_snaps( ghobject_t{coid, ghobject_t::NO_GEN, shard_id_t::NO_SHARD}, OI_ATTR, bl); - log_entries.emplace_back( - pg_log_entry_t{ - pg_log_entry_t::MODIFY, - coid, - obc->obs.oi.version, - obc->obs.oi.prior_version, - 0, - osd_reqid_t(), - obc->obs.oi.mtime, - 0} - ); + add_log_entry( + pg_log_entry_t::MODIFY, + coid, + obc->obs.oi.prior_version, + 0, + osd_reqid_t(), + obc->obs.oi.mtime, + 0); return OpsExecuter::snap_map_modify( coid, new_snaps, pg->snap_mapper, pg->osdriver, txn); } @@ -372,23 +360,19 @@ SnapTrimObjSubEvent::adjust_snaps( void SnapTrimObjSubEvent::update_head( ObjectContextRef obc, ObjectContextRef head_obc, - ceph::os::Transaction& txn, - std::vector<pg_log_entry_t>& log_entries + ceph::os::Transaction& txn ) { const auto head_oid = coid.get_head(); logger().info("{}: writing updated snapset on {}, snapset is {}", *this, head_oid, head_obc->ssc->snapset); - log_entries.emplace_back( - pg_log_entry_t{ - pg_log_entry_t::MODIFY, - head_oid, - osd_op_p.at_version, - head_obc->obs.oi.version, - 0, - osd_reqid_t(), - obc->obs.oi.mtime, - 0} - ); + add_log_entry( + pg_log_entry_t::MODIFY, + head_oid, + head_obc->obs.oi.version, + 0, + osd_reqid_t(), + obc->obs.oi.mtime, + 0); head_obc->obs.oi.prior_version = head_obc->obs.oi.version; head_obc->obs.oi.version = osd_op_p.at_version; @@ -408,8 +392,7 @@ void SnapTrimObjSubEvent::update_head( attrs); } -SnapTrimObjSubEvent::remove_or_update_iertr::future< - SnapTrimObjSubEvent::remove_or_update_ret_t> +SnapTrimObjSubEvent::remove_or_update_iertr::future<ceph::os::Transaction> SnapTrimObjSubEvent::remove_or_update( ObjectContextRef obc, ObjectContextRef head_obc) @@ -442,45 +425,41 @@ SnapTrimObjSubEvent::remove_or_update( } return seastar::do_with(ceph::os::Transaction{}, [=, this](auto &txn) { - std::vector<pg_log_entry_t> log_entries{}; - - int64_t num_objects_before_trim = delta_stats.num_objects; - osd_op_p.at_version = pg->next_version(); - auto ret = remove_or_update_iertr::now(); - if (new_snaps.empty()) { - // remove clone from snapset - logger().info("{}: {} snaps {} -> {} ... deleting", - *this, coid, old_snaps, new_snaps); - ret = remove_clone(obc, head_obc, txn, log_entries); - } else { - // save adjusted snaps for this object - logger().info("{}: {} snaps {} -> {}", - *this, coid, old_snaps, new_snaps); - ret = adjust_snaps(obc, head_obc, new_snaps, txn, log_entries); - } - return std::move(ret).si_then( - [&txn, obc, num_objects_before_trim, log_entries=std::move(log_entries), head_obc=std::move(head_obc), this]() mutable { - osd_op_p.at_version = pg->next_version(); - - // save head snapset - logger().debug("{}: {} new snapset {} on {}", - *this, coid, head_obc->ssc->snapset, head_obc->obs.oi); - if (head_obc->ssc->snapset.clones.empty() && head_obc->obs.oi.is_whiteout()) { - remove_head_whiteout(obc, head_obc, txn, log_entries); + int64_t num_objects_before_trim = delta_stats.num_objects; + osd_op_p.at_version = pg->get_next_version(); + auto ret = remove_or_update_iertr::now(); + if (new_snaps.empty()) { + // remove clone from snapset + logger().info("{}: {} snaps {} -> {} ... deleting", + *this, coid, old_snaps, new_snaps); + ret = remove_clone(obc, head_obc, txn); } else { - update_head(obc, head_obc, txn, log_entries); - } - // Stats reporting - Set number of objects trimmed - if (num_objects_before_trim > delta_stats.num_objects) { - //int64_t num_objects_trimmed = - // num_objects_before_trim - delta_stats.num_objects; - //add_objects_trimmed_count(num_objects_trimmed); + // save adjusted snaps for this object + logger().info("{}: {} snaps {} -> {}", + *this, coid, old_snaps, new_snaps); + ret = adjust_snaps(obc, head_obc, new_snaps, txn); } - }).si_then( - [&txn, log_entries=std::move(log_entries)] () mutable { - return remove_or_update_iertr::make_ready_future<remove_or_update_ret_t>( - std::make_pair(std::move(txn), std::move(log_entries))); - }); + return std::move(ret).si_then( + [&txn, obc, num_objects_before_trim, + head_obc=std::move(head_obc), this]() mutable { + // save head snapset + logger().debug("{}: {} new snapset {} on {}", + *this, coid, head_obc->ssc->snapset, head_obc->obs.oi); + if (head_obc->ssc->snapset.clones.empty() && head_obc->obs.oi.is_whiteout()) { + remove_head_whiteout(obc, head_obc, txn); + } else { + update_head(obc, head_obc, txn); + } + // Stats reporting - Set number of objects trimmed + if (num_objects_before_trim > delta_stats.num_objects) { + //int64_t num_objects_trimmed = + // num_objects_before_trim - delta_stats.num_objects; + //add_objects_trimmed_count(num_objects_trimmed); + } + }).si_then( + [&txn] () mutable { + return std::move(txn); + }); }); } @@ -518,8 +497,7 @@ SnapTrimObjSubEvent::start() logger().debug("{}: processing clone_obc={}", *this, clone_obc->get_oid()); return remove_or_update( clone_obc, head_obc - ).safe_then_unpack_interruptible([clone_obc, this] - (auto&& txn, auto&& log_entries) mutable { + ).safe_then_interruptible([clone_obc, this](auto&& txn) mutable { auto [submitted, all_completed] = pg->submit_transaction( std::move(clone_obc), std::move(txn), diff --git a/src/crimson/osd/osd_operations/snaptrim_event.h b/src/crimson/osd/osd_operations/snaptrim_event.h index f7c512881de..5afba7d7a47 100644 --- a/src/crimson/osd/osd_operations/snaptrim_event.h +++ b/src/crimson/osd/osd_operations/snaptrim_event.h @@ -147,28 +147,22 @@ private: remove_or_update_iertr::future<> remove_clone( ObjectContextRef obc, ObjectContextRef head_obc, - ceph::os::Transaction& txn, - std::vector<pg_log_entry_t>& log_entries); + ceph::os::Transaction& txn); void remove_head_whiteout( ObjectContextRef obc, ObjectContextRef head_obc, - ceph::os::Transaction& txn, - std::vector<pg_log_entry_t>& log_entries); + ceph::os::Transaction& txn); interruptible_future<> adjust_snaps( ObjectContextRef obc, ObjectContextRef head_obc, const std::set<snapid_t>& new_snaps, - ceph::os::Transaction& txn, - std::vector<pg_log_entry_t>& log_entries); + ceph::os::Transaction& txn); void update_head( ObjectContextRef obc, ObjectContextRef head_obc, - ceph::os::Transaction& txn, - std::vector<pg_log_entry_t>& log_entries); + ceph::os::Transaction& txn); - using remove_or_update_ret_t = - std::pair<ceph::os::Transaction, std::vector<pg_log_entry_t>>; - remove_or_update_iertr::future<remove_or_update_ret_t> + remove_or_update_iertr::future<ceph::os::Transaction> remove_or_update(ObjectContextRef obc, ObjectContextRef head_obc); // we don't need to synchronize with other instances started by @@ -177,11 +171,32 @@ private: static constexpr auto type_name = "SnapTrimObjSubEvent::wait_repop"; } wait_repop; + void add_log_entry( + int _op, + const hobject_t& _soid, + const eversion_t& pv, + version_t uv, + const osd_reqid_t& rid, + const utime_t& mt, + int return_code) { + log_entries.emplace_back( + _op, + _soid, + osd_op_p.at_version, + pv, + uv, + rid, + mt, + return_code); + osd_op_p.at_version.version++; + } + Ref<PG> pg; PipelineHandle handle; osd_op_params_t osd_op_p; const hobject_t coid; const snapid_t snap_to_trim; + std::vector<pg_log_entry_t> log_entries; public: PipelineHandle& get_handle() { return handle; } diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc index 91c6439f682..00592eb8347 100644 --- a/src/crimson/osd/pg.cc +++ b/src/crimson/osd/pg.cc @@ -773,6 +773,10 @@ PG::submit_transaction( peering_state.append_log_with_trim_to_updated(std::move(log_entries), osd_op_p.at_version, txn, true, false); + ceph_assert(!log_entries.empty()); + ceph_assert(log_entries.rbegin()->version >= projected_last_update); + projected_last_update = log_entries.rbegin()->version; + auto [submitted, all_completed] = backend->mutate_object( peering_state.get_acting_recovery_backfill(), std::move(obc), @@ -968,7 +972,7 @@ seastar::future<> PG::submit_error_log( mempool::osd_pglog::list<pg_log_entry_t> log_entries; log_entries.push_back(pg_log_entry_t(pg_log_entry_t::ERROR, obc->obs.oi.soid, - next_version(), + get_next_version(), eversion_t(), 0, reqid, utime_t(), -e.value())); @@ -1261,9 +1265,9 @@ PG::with_locked_obc(const hobject_t &hobj, throw crimson::common::system_shutdown_exception(); } const hobject_t oid = get_oid(hobj); - auto wrapper = [f=std::move(f), this](auto obc) { + auto wrapper = [f=std::move(f), this](auto head, auto obc) { check_blocklisted_obc_watchers(obc); - return f(obc); + return f(head, obc); }; switch (get_lock_type(op_info)) { case RWState::RWREAD: @@ -1531,14 +1535,19 @@ void PG::on_change(ceph::os::Transaction &t) { } void PG::context_registry_on_change() { - obc_registry.for_each([](ObjectContextRef obc) { - assert(obc); - for (auto j = obc->watchers.begin(); - j != obc->watchers.end(); - j = obc->watchers.erase(j)) { - j->second->discard_state(); - } + std::vector<seastar::shared_ptr<crimson::osd::Watch>> watchers; + obc_registry.for_each([&watchers](ObjectContextRef obc) { + assert(obc); + for (auto j = obc->watchers.begin(); + j != obc->watchers.end(); + j = obc->watchers.erase(j)) { + watchers.emplace_back(j->second); + } }); + + for (auto &watcher : watchers) { + watcher->discard_state(); + } } bool PG::can_discard_op(const MOSDOp& m) const { diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h index 056d5e9e1c0..4628b09b509 100644 --- a/src/crimson/osd/pg.h +++ b/src/crimson/osd/pg.h @@ -348,8 +348,7 @@ public: void on_active_advmap(const OSDMapRef &osdmap) final; epoch_t cluster_osdmap_trim_lower_bound() final { - // TODO - return 0; + return shard_services.get_osdmap_tlb(); } void on_backfill_reserved() final { @@ -506,7 +505,7 @@ public: public: using with_obc_func_t = - std::function<load_obc_iertr::future<> (ObjectContextRef)>; + std::function<load_obc_iertr::future<> (ObjectContextRef, ObjectContextRef)>; load_obc_iertr::future<> with_locked_obc( const hobject_t &hobj, @@ -610,9 +609,9 @@ private: public: cached_map_t get_osdmap() { return peering_state.get_osdmap(); } - eversion_t next_version() { + eversion_t get_next_version() { return eversion_t(get_osdmap_epoch(), - ++projected_last_update.version); + projected_last_update.version + 1); } ShardServices& get_shard_services() final { return shard_services; @@ -643,6 +642,12 @@ private: std::optional<pg_stat_t> pg_stats; public: + OSDriver &get_osdriver() final { + return osdriver; + } + SnapMapper &get_snap_mapper() final { + return snap_mapper; + } RecoveryBackend* get_recovery_backend() final { return recovery_backend.get(); } diff --git a/src/crimson/osd/pg_backend.cc b/src/crimson/osd/pg_backend.cc index 02acb9a55d3..5e3e7a91c0f 100644 --- a/src/crimson/osd/pg_backend.cc +++ b/src/crimson/osd/pg_backend.cc @@ -787,7 +787,7 @@ PGBackend::rollback_iertr::future<> PGBackend::rollback( return obc_loader.with_clone_obc_only<RWState::RWWRITE>( head, target_coid, [this, &os, &txn, &delta_stats, &osd_op_params] - (auto resolved_obc) { + (auto, auto resolved_obc) { if (resolved_obc->obs.oi.soid.is_head()) { // no-op: The resolved oid returned the head object logger().debug("PGBackend::rollback: loaded head_obc: {}" diff --git a/src/crimson/osd/pg_recovery.cc b/src/crimson/osd/pg_recovery.cc index 09b45779ec8..13ac069c63d 100644 --- a/src/crimson/osd/pg_recovery.cc +++ b/src/crimson/osd/pg_recovery.cc @@ -140,13 +140,14 @@ size_t PGRecovery::start_primary_recovery_ops( hobject_t head = soid.get_head(); + bool head_missing = missing.is_missing(head); logger().info( "{} {} item.need {} {} {} {} {}", __func__, soid, item.need, missing.is_missing(soid) ? " (missing)":"", - missing.is_missing(head) ? " (missing head)":"", + head_missing ? " (missing head)":"", pg->get_recovery_backend()->is_recovering(soid) ? " (recovering)":"", pg->get_recovery_backend()->is_recovering(head) ? " (recovering head)":""); @@ -158,7 +159,15 @@ size_t PGRecovery::start_primary_recovery_ops( } else if (pg->get_recovery_backend()->is_recovering(head)) { ++skipped; } else { - out->emplace_back(recover_missing(trigger, soid, item.need)); + if (head_missing) { + auto it = missing.get_items().find(head); + assert(it != missing.get_items().end()); + auto head_need = it->second.need; + out->emplace_back(recover_missing(trigger, head, head_need)); + ++skipped; + } else { + out->emplace_back(recover_missing(trigger, soid, item.need)); + } ++started; } @@ -266,20 +275,27 @@ PGRecovery::recover_missing( RecoveryBackend::RecoveryBlockingEvent::TriggerI& trigger, const hobject_t &soid, eversion_t need) { - if (pg->get_peering_state().get_missing_loc().is_deleted(soid)) { - return pg->get_recovery_backend()->add_recovering(soid).wait_track_blocking( - trigger, - pg->get_recovery_backend()->recover_delete(soid, need)); + logger().info("{} {} v {}", __func__, soid, need); + auto [recovering, added] = pg->get_recovery_backend()->add_recovering(soid); + if (added) { + logger().info("{} {} v {}, new recovery", __func__, soid, need); + if (pg->get_peering_state().get_missing_loc().is_deleted(soid)) { + return recovering.wait_track_blocking( + trigger, + pg->get_recovery_backend()->recover_delete(soid, need)); + } else { + return recovering.wait_track_blocking( + trigger, + pg->get_recovery_backend()->recover_object(soid, need) + .handle_exception_interruptible( + [=, this, soid = std::move(soid)] (auto e) { + on_failed_recover({ pg->get_pg_whoami() }, soid, need); + return seastar::make_ready_future<>(); + }) + ); + } } else { - return pg->get_recovery_backend()->add_recovering(soid).wait_track_blocking( - trigger, - pg->get_recovery_backend()->recover_object(soid, need) - .handle_exception_interruptible( - [=, this, soid = std::move(soid)] (auto e) { - on_failed_recover({ pg->get_pg_whoami() }, soid, need); - return seastar::make_ready_future<>(); - }) - ); + return recovering.wait_for_recovered(); } } @@ -288,16 +304,23 @@ RecoveryBackend::interruptible_future<> PGRecovery::prep_object_replica_deletes( const hobject_t& soid, eversion_t need) { - return pg->get_recovery_backend()->add_recovering(soid).wait_track_blocking( - trigger, - pg->get_recovery_backend()->push_delete(soid, need).then_interruptible( - [=, this] { - object_stat_sum_t stat_diff; - stat_diff.num_objects_recovered = 1; - on_global_recover(soid, stat_diff, true); - return seastar::make_ready_future<>(); - }) - ); + logger().info("{} {} v {}", __func__, soid, need); + auto [recovering, added] = pg->get_recovery_backend()->add_recovering(soid); + if (added) { + logger().info("{} {} v {}, new recovery", __func__, soid, need); + return recovering.wait_track_blocking( + trigger, + pg->get_recovery_backend()->push_delete(soid, need).then_interruptible( + [=, this] { + object_stat_sum_t stat_diff; + stat_diff.num_objects_recovered = 1; + on_global_recover(soid, stat_diff, true); + return seastar::make_ready_future<>(); + }) + ); + } else { + return recovering.wait_for_recovered(); + } } RecoveryBackend::interruptible_future<> PGRecovery::prep_object_replica_pushes( @@ -305,18 +328,26 @@ RecoveryBackend::interruptible_future<> PGRecovery::prep_object_replica_pushes( const hobject_t& soid, eversion_t need) { - return pg->get_recovery_backend()->add_recovering(soid).wait_track_blocking( - trigger, - pg->get_recovery_backend()->recover_object(soid, need) - .handle_exception_interruptible( - [=, this, soid = std::move(soid)] (auto e) { - on_failed_recover({ pg->get_pg_whoami() }, soid, need); - return seastar::make_ready_future<>(); - }) - ); + logger().info("{} {} v {}", __func__, soid, need); + auto [recovering, added] = pg->get_recovery_backend()->add_recovering(soid); + if (added) { + logger().info("{} {} v {}, new recovery", __func__, soid, need); + return recovering.wait_track_blocking( + trigger, + pg->get_recovery_backend()->recover_object(soid, need) + .handle_exception_interruptible( + [=, this, soid = std::move(soid)] (auto e) { + on_failed_recover({ pg->get_pg_whoami() }, soid, need); + return seastar::make_ready_future<>(); + }) + ); + } else { + return recovering.wait_for_recovered(); + } } -void PGRecovery::on_local_recover( +RecoveryBackend::interruptible_future<> +PGRecovery::on_local_recover( const hobject_t& soid, const ObjectRecoveryInfo& recovery_info, const bool is_delete, @@ -332,20 +363,38 @@ void PGRecovery::on_local_recover( ceph_abort("mark_unfound_lost (LOST_REVERT) is not implemented yet"); } } - pg->get_peering_state().recover_got(soid, - recovery_info.version, is_delete, t); - - if (pg->is_primary()) { - if (!is_delete) { - auto& obc = pg->get_recovery_backend()->get_recovering(soid).obc; //TODO: move to pg backend? - obc->obs.exists = true; - obc->obs.oi = recovery_info.oi; + + return RecoveryBackend::interruptor::async( + [soid, &recovery_info, is_delete, &t, this] { + if (soid.is_snap()) { + OSDriver::OSTransaction _t(pg->get_osdriver().get_transaction(&t)); + int r = pg->get_snap_mapper().remove_oid(soid, &_t); + assert(r == 0 || r == -ENOENT); + + if (!is_delete) { + set<snapid_t> snaps; + auto p = recovery_info.ss.clone_snaps.find(soid.snap); + assert(p != recovery_info.ss.clone_snaps.end()); + snaps.insert(p->second.begin(), p->second.end()); + pg->get_snap_mapper().add_oid(recovery_info.soid, snaps, &_t); + } } - if (!pg->is_unreadable_object(soid)) { - pg->get_recovery_backend()->get_recovering(soid).set_readable(); + + pg->get_peering_state().recover_got(soid, + recovery_info.version, is_delete, t); + + if (pg->is_primary()) { + if (!is_delete) { + auto& obc = pg->get_recovery_backend()->get_recovering(soid).obc; //TODO: move to pg backend? + obc->obs.exists = true; + obc->obs.oi = recovery_info.oi; + } + if (!pg->is_unreadable_object(soid)) { + pg->get_recovery_backend()->get_recovering(soid).set_readable(); + } + pg->publish_stats_to_osd(); } - pg->publish_stats_to_osd(); - } + }); } void PGRecovery::on_global_recover ( @@ -449,9 +498,11 @@ void PGRecovery::enqueue_push( const hobject_t& obj, const eversion_t& v) { - logger().debug("{}: obj={} v={}", + logger().info("{}: obj={} v={}", __func__, obj, v); - pg->get_recovery_backend()->add_recovering(obj); + auto [recovering, added] = pg->get_recovery_backend()->add_recovering(obj); + if (!added) + return; std::ignore = pg->get_recovery_backend()->recover_object(obj, v).\ handle_exception_interruptible([] (auto) { ceph_abort_msg("got exception on backfill's push"); diff --git a/src/crimson/osd/pg_recovery.h b/src/crimson/osd/pg_recovery.h index 719d0ad2d34..ad224073857 100644 --- a/src/crimson/osd/pg_recovery.h +++ b/src/crimson/osd/pg_recovery.h @@ -64,7 +64,7 @@ private: const hobject_t& soid, eversion_t need); - void on_local_recover( + RecoveryBackend::interruptible_future<> on_local_recover( const hobject_t& soid, const ObjectRecoveryInfo& recovery_info, bool is_delete, diff --git a/src/crimson/osd/pg_recovery_listener.h b/src/crimson/osd/pg_recovery_listener.h index c922b99561a..a53221c4a69 100644 --- a/src/crimson/osd/pg_recovery_listener.h +++ b/src/crimson/osd/pg_recovery_listener.h @@ -36,4 +36,6 @@ public: virtual const pg_info_t& get_info() const= 0; virtual seastar::future<> stop() = 0; virtual void publish_stats_to_osd() = 0; + virtual OSDriver &get_osdriver() = 0; + virtual SnapMapper &get_snap_mapper() = 0; }; diff --git a/src/crimson/osd/pg_shard_manager.cc b/src/crimson/osd/pg_shard_manager.cc index 6061c856be2..1980b36271c 100644 --- a/src/crimson/osd/pg_shard_manager.cc +++ b/src/crimson/osd/pg_shard_manager.cc @@ -105,4 +105,13 @@ seastar::future<> PGShardManager::set_up_epoch(epoch_t e) { }); } +seastar::future<> PGShardManager::set_superblock(OSDSuperblock superblock) { + ceph_assert(seastar::this_shard_id() == PRIMARY_CORE); + get_osd_singleton_state().set_singleton_superblock(superblock); + return shard_services.invoke_on_all( + [superblock = std::move(superblock)](auto &local_service) { + return local_service.local_state.update_shard_superblock(superblock); + }); +} + } diff --git a/src/crimson/osd/pg_shard_manager.h b/src/crimson/osd/pg_shard_manager.h index cf13cb52bbf..1559dde0cb1 100644 --- a/src/crimson/osd/pg_shard_manager.h +++ b/src/crimson/osd/pg_shard_manager.h @@ -129,16 +129,17 @@ public: FORWARD_TO_OSD_SINGLETON(init_meta_coll) FORWARD_TO_OSD_SINGLETON(get_meta_coll) - FORWARD_TO_OSD_SINGLETON(set_superblock) - // Core OSDMap methods FORWARD_TO_OSD_SINGLETON(get_local_map) FORWARD_TO_OSD_SINGLETON(load_map_bl) FORWARD_TO_OSD_SINGLETON(load_map_bls) FORWARD_TO_OSD_SINGLETON(store_maps) + FORWARD_TO_OSD_SINGLETON(trim_maps) seastar::future<> set_up_epoch(epoch_t e); + seastar::future<> set_superblock(OSDSuperblock superblock); + template <typename F> auto with_remote_shard_state(core_id_t core, F &&f) { return shard_services.invoke_on( @@ -151,7 +152,7 @@ public: template <typename T, typename F> auto process_ordered_op_remotely( - crosscore_ordering_t::seq_t cc_seq, + OSDConnectionPriv::crosscore_ordering_t::seq_t cc_seq, ShardServices &target_shard_services, typename T::IRef &&op, F &&f) { diff --git a/src/crimson/osd/recovery_backend.h b/src/crimson/osd/recovery_backend.h index 65e9bb01fbd..abf69589159 100644 --- a/src/crimson/osd/recovery_backend.h +++ b/src/crimson/osd/recovery_backend.h @@ -45,10 +45,10 @@ public: coll{coll}, backend{backend} {} virtual ~RecoveryBackend() {} - WaitForObjectRecovery& add_recovering(const hobject_t& soid) { + std::pair<WaitForObjectRecovery&, bool> add_recovering(const hobject_t& soid) { auto [it, added] = recovering.emplace(soid, new WaitForObjectRecovery{}); - assert(added); - return *(it->second); + assert(it->second); + return {*(it->second), added}; } WaitForObjectRecovery& get_recovering(const hobject_t& soid) { assert(is_recovering(soid)); diff --git a/src/crimson/osd/replicated_recovery_backend.cc b/src/crimson/osd/replicated_recovery_backend.cc index bd301cc2b67..803d067fd65 100644 --- a/src/crimson/osd/replicated_recovery_backend.cc +++ b/src/crimson/osd/replicated_recovery_backend.cc @@ -34,12 +34,12 @@ ReplicatedRecoveryBackend::recover_object( return maybe_pull_missing_obj(soid, need).then_interruptible([this, soid, need] { logger().debug("recover_object: loading obc: {}", soid); return pg.obc_loader.with_obc<RWState::RWREAD>(soid, - [this, soid, need](auto obc) { + [this, soid, need](auto head, auto obc) { logger().debug("recover_object: loaded obc: {}", obc->obs.oi.soid); auto& recovery_waiter = get_recovering(soid); recovery_waiter.obc = obc; recovery_waiter.obc->wait_recovery_read(); - return maybe_push_shards(soid, need); + return maybe_push_shards(head, soid, need); }).handle_error_interruptible( crimson::osd::PG::load_obc_ertr::all_same_way([soid](auto& code) { // TODO: may need eio handling? @@ -51,16 +51,18 @@ ReplicatedRecoveryBackend::recover_object( RecoveryBackend::interruptible_future<> ReplicatedRecoveryBackend::maybe_push_shards( + const crimson::osd::ObjectContextRef &head_obc, const hobject_t& soid, eversion_t need) { return seastar::do_with( get_shards_to_push(soid), - [this, need, soid](auto &shards) { + [this, need, soid, head_obc](auto &shards) { return interruptor::parallel_for_each( shards, - [this, need, soid](auto shard) { - return prep_push(soid, need, shard).then_interruptible([this, soid, shard](auto push) { + [this, need, soid, head_obc](auto shard) { + return prep_push(head_obc, soid, need, shard + ).then_interruptible([this, soid, shard](auto push) { auto msg = crimson::make_message<MOSDPGPush>(); msg->from = pg.get_pg_whoami(); msg->pgid = pg.get_pgid(); @@ -113,27 +115,31 @@ ReplicatedRecoveryBackend::maybe_pull_missing_obj( if (!local_missing.is_missing(soid)) { return seastar::make_ready_future<>(); } - PullOp pull_op; - auto& recovery_waiter = get_recovering(soid); - recovery_waiter.pull_info = - std::make_optional<RecoveryBackend::pull_info_t>(); - auto& pull_info = *recovery_waiter.pull_info; - prepare_pull(pull_op, pull_info, soid, need); - auto msg = crimson::make_message<MOSDPGPull>(); - msg->from = pg.get_pg_whoami(); - msg->set_priority(pg.get_recovery_op_priority()); - msg->pgid = pg.get_pgid(); - msg->map_epoch = pg.get_osdmap_epoch(); - msg->min_epoch = pg.get_last_peering_reset(); - msg->set_pulls({std::move(pull_op)}); - return interruptor::make_interruptible( - shard_services.send_to_osd( + return pg.obc_loader.with_obc<RWState::RWREAD>(soid.get_head(), + [this, soid, need](auto head, auto) { + PullOp pull_op; + auto& recovery_waiter = get_recovering(soid); + recovery_waiter.pull_info = + std::make_optional<RecoveryBackend::pull_info_t>(); + auto& pull_info = *recovery_waiter.pull_info; + prepare_pull(head, pull_op, pull_info, soid, need); + auto msg = crimson::make_message<MOSDPGPull>(); + msg->from = pg.get_pg_whoami(); + msg->set_priority(pg.get_recovery_op_priority()); + msg->pgid = pg.get_pgid(); + msg->map_epoch = pg.get_osdmap_epoch(); + msg->min_epoch = pg.get_last_peering_reset(); + msg->set_pulls({std::move(pull_op)}); + return shard_services.send_to_osd( pull_info.from.osd, std::move(msg), - pg.get_osdmap_epoch() - )).then_interruptible([&recovery_waiter] { + pg.get_osdmap_epoch()); + }).si_then([this, soid] { + auto& recovery_waiter = get_recovering(soid); return recovery_waiter.wait_for_pull(); - }); + }).handle_error_interruptible( + crimson::ct_error::assert_all("unexpected error") + ); } RecoveryBackend::interruptible_future<> @@ -201,15 +207,19 @@ ReplicatedRecoveryBackend::on_local_recover_persist( epoch_t epoch_frozen) { logger().debug("{}", __func__); - ceph::os::Transaction t; - pg.get_recovery_handler()->on_local_recover(soid, _recovery_info, is_delete, t); - logger().debug("ReplicatedRecoveryBackend::on_local_recover_persist: do_transaction..."); - return interruptor::make_interruptible( - shard_services.get_store().do_transaction(coll, std::move(t))) - .then_interruptible( - [this, epoch_frozen, last_complete = pg.get_info().last_complete] { - pg.get_recovery_handler()->_committed_pushed_object(epoch_frozen, last_complete); - return seastar::make_ready_future<>(); + return seastar::do_with( + ceph::os::Transaction(), + [this, soid, &_recovery_info, is_delete, epoch_frozen](auto &t) { + return pg.get_recovery_handler()->on_local_recover( + soid, _recovery_info, is_delete, t + ).then_interruptible([this, &t] { + logger().debug("ReplicatedRecoveryBackend::{}: do_transaction...", __func__); + return shard_services.get_store().do_transaction(coll, std::move(t)); + }).then_interruptible( + [this, epoch_frozen, last_complete = pg.get_info().last_complete] { + pg.get_recovery_handler()->_committed_pushed_object(epoch_frozen, last_complete); + return seastar::make_ready_future<>(); + }); }); } @@ -235,18 +245,24 @@ ReplicatedRecoveryBackend::local_recover_delete( } return seastar::make_ready_future<>(); }).safe_then_interruptible([this, soid, epoch_to_freeze, need] { - ObjectRecoveryInfo recovery_info; - recovery_info.soid = soid; - recovery_info.version = need; - return on_local_recover_persist(soid, recovery_info, - true, epoch_to_freeze); - }, PGBackend::load_metadata_ertr::all_same_way( - [this, soid, epoch_to_freeze, need] (auto e) { - ObjectRecoveryInfo recovery_info; + return seastar::do_with( + ObjectRecoveryInfo(), + [soid, need, this, epoch_to_freeze](auto &recovery_info) { recovery_info.soid = soid; recovery_info.version = need; return on_local_recover_persist(soid, recovery_info, - true, epoch_to_freeze); + true, epoch_to_freeze); + }); + }, PGBackend::load_metadata_ertr::all_same_way( + [this, soid, epoch_to_freeze, need] (auto e) { + return seastar::do_with( + ObjectRecoveryInfo(), + [soid, need, this, epoch_to_freeze](auto &recovery_info) { + recovery_info.soid = soid; + recovery_info.version = need; + return on_local_recover_persist(soid, recovery_info, + true, epoch_to_freeze); + }); }) ); } @@ -293,6 +309,7 @@ ReplicatedRecoveryBackend::recover_delete( RecoveryBackend::interruptible_future<PushOp> ReplicatedRecoveryBackend::prep_push( + const crimson::osd::ObjectContextRef &head_obc, const hobject_t& soid, eversion_t need, pg_shard_t pg_shard) @@ -323,6 +340,8 @@ ReplicatedRecoveryBackend::prep_push( push_info.recovery_info.copy_subset = data_subset; push_info.recovery_info.soid = soid; push_info.recovery_info.oi = obc->obs.oi; + assert(head_obc->ssc); + push_info.recovery_info.ss = head_obc->ssc->snapset; push_info.recovery_info.version = obc->obs.oi.version; push_info.recovery_info.object_exist = missing_iter->second.clean_regions.object_is_exist(); @@ -340,7 +359,9 @@ ReplicatedRecoveryBackend::prep_push( }); } -void ReplicatedRecoveryBackend::prepare_pull(PullOp& pull_op, +void ReplicatedRecoveryBackend::prepare_pull( + const crimson::osd::ObjectContextRef &head_obc, + PullOp& pull_op, pull_info_t& pull_info, const hobject_t& soid, eversion_t need) { @@ -355,6 +376,10 @@ void ReplicatedRecoveryBackend::prepare_pull(PullOp& pull_op, pull_op.recovery_info.copy_subset.insert(0, (uint64_t) -1); pull_op.recovery_info.copy_subset.intersection_of( missing_iter->second.clean_regions.get_dirty_regions()); + if (soid.is_snap()) { + assert(head_obc->ssc); + pull_op.recovery_info.ss = head_obc->ssc->snapset; + } pull_op.recovery_info.size = ((uint64_t) -1); pull_op.recovery_info.object_exist = missing_iter->second.clean_regions.object_is_exist(); @@ -689,7 +714,7 @@ ReplicatedRecoveryBackend::_handle_pull_response( if (pull_info.recovery_progress.first) { prepare_waiter = pg.obc_loader.with_obc<RWState::RWNONE>( pull_info.recovery_info.soid, - [&pull_info, &recovery_waiter, &push_op](auto obc) { + [&pull_info, &recovery_waiter, &push_op](auto, auto obc) { pull_info.obc = obc; recovery_waiter.obc = obc; obc->obs.oi.decode_no_oid(push_op.attrset.at(OI_ATTR), push_op.soid); @@ -724,21 +749,24 @@ ReplicatedRecoveryBackend::_handle_pull_response( push_op.attrset, std::move(push_op.omap_entries), t) .then_interruptible( [this, response, &pull_info, &push_op, complete, - t, bytes_recovered=data.length()] { + t, bytes_recovered=data.length()]() + -> RecoveryBackend::interruptible_future<bool> { pull_info.stat.num_keys_recovered += push_op.omap_entries.size(); pull_info.stat.num_bytes_recovered += bytes_recovered; if (complete) { pull_info.stat.num_objects_recovered++; - pg.get_recovery_handler()->on_local_recover( + return pg.get_recovery_handler()->on_local_recover( push_op.soid, get_recovering(push_op.soid).pull_info->recovery_info, - false, *t); - return true; + false, *t + ).then_interruptible([] { + return true; + }); } else { response->soid = push_op.soid; response->recovery_info = pull_info.recovery_info; response->recovery_progress = pull_info.recovery_progress; - return false; + return seastar::make_ready_future<bool>(false); } }); }); @@ -834,10 +862,11 @@ ReplicatedRecoveryBackend::_handle_push( .then_interruptible( [this, complete, &push_op, t] { if (complete) { - pg.get_recovery_handler()->on_local_recover( + return pg.get_recovery_handler()->on_local_recover( push_op.recovery_info.soid, push_op.recovery_info, false, *t); } + return RecoveryBackend::interruptor::now(); }); } diff --git a/src/crimson/osd/replicated_recovery_backend.h b/src/crimson/osd/replicated_recovery_backend.h index b023b7417e5..16d6369a91f 100644 --- a/src/crimson/osd/replicated_recovery_backend.h +++ b/src/crimson/osd/replicated_recovery_backend.h @@ -49,10 +49,12 @@ protected: interruptible_future<> handle_recovery_delete_reply( Ref<MOSDPGRecoveryDeleteReply> m); interruptible_future<PushOp> prep_push( + const crimson::osd::ObjectContextRef &head_obc, const hobject_t& soid, eversion_t need, pg_shard_t pg_shard); void prepare_pull( + const crimson::osd::ObjectContextRef &head_obc, PullOp& pull_op, pull_info_t& pull_info, const hobject_t& soid, @@ -124,6 +126,7 @@ private: load_obc_ertr>; interruptible_future<> maybe_push_shards( + const crimson::osd::ObjectContextRef &head_obc, const hobject_t& soid, eversion_t need); diff --git a/src/crimson/osd/shard_services.cc b/src/crimson/osd/shard_services.cc index c0688f95769..d73a85fb5f8 100644 --- a/src/crimson/osd/shard_services.cc +++ b/src/crimson/osd/shard_services.cc @@ -72,7 +72,7 @@ seastar::future<> PerShardState::stop_pgs() }); } -std::map<pg_t, pg_stat_t> PerShardState::get_pg_stats() const +std::map<pg_t, pg_stat_t> PerShardState::get_pg_stats() { assert_core(); std::map<pg_t, pg_stat_t> ret; @@ -119,6 +119,13 @@ HeartbeatStampsRef PerShardState::get_hb_stamps(int peer) return stamps->second; } +seastar::future<> PerShardState::update_shard_superblock(OSDSuperblock superblock) +{ + assert_core(); + per_shard_superblock = std::move(superblock); + return seastar::now(); +} + OSDSingletonState::OSDSingletonState( int whoami, crimson::net::Messenger &cluster_msgr, @@ -168,7 +175,8 @@ seastar::future<> OSDSingletonState::send_to_osd( } else { auto conn = cluster_msgr.connect( osdmap->get_cluster_addrs(peer).front(), CEPH_ENTITY_TYPE_OSD); - return conn->send(std::move(m)); + // TODO: gate the crosscore sending + return conn->send_with_throttling(std::move(m)); } } @@ -351,7 +359,6 @@ void OSDSingletonState::handle_conf_change( seastar::future<OSDSingletonState::local_cached_map_t> OSDSingletonState::get_local_map(epoch_t e) { - // TODO: use LRU cache for managing osdmap, fallback to disk if we have to if (auto found = osdmaps.find(e); found) { logger().debug("{} osdmap.{} found in cache", __func__, e); return seastar::make_ready_future<local_cached_map_t>(std::move(found)); @@ -391,6 +398,9 @@ seastar::future<std::map<epoch_t, bufferlist>> OSDSingletonState::load_map_bls( logger().debug("{} loading maps [{},{}]", __func__, first, last); ceph_assert(first <= last); + // TODO: take osd_map_max into account + //int max = cct->_conf->osd_map_message_max; + //ssize_t max_bytes = cct->_conf->osd_map_message_max_bytes; return seastar::map_reduce(boost::make_counting_iterator<epoch_t>(first), boost::make_counting_iterator<epoch_t>(last + 1), [this](epoch_t e) { @@ -422,15 +432,19 @@ seastar::future<std::unique_ptr<OSDMap>> OSDSingletonState::load_map(epoch_t e) seastar::future<> OSDSingletonState::store_maps(ceph::os::Transaction& t, epoch_t start, Ref<MOSDMap> m) { - return seastar::do_for_each( - boost::make_counting_iterator(start), - boost::make_counting_iterator(m->get_last() + 1), - [&t, m, this](epoch_t e) { + return seastar::do_with( + std::map<epoch_t, OSDMap*>(), + [&t, m, start, this](auto &added_maps) { + return seastar::do_for_each( + boost::make_counting_iterator(start), + boost::make_counting_iterator(m->get_last() + 1), + [&t, m, this, &added_maps](epoch_t e) { if (auto p = m->maps.find(e); p != m->maps.end()) { auto o = std::make_unique<OSDMap>(); o->decode(p->second); logger().info("store_maps storing osdmap.{}", e); store_map_bl(t, e, std::move(std::move(p->second))); + added_maps.emplace(e, o.get()); osdmaps.insert(e, std::move(o)); return seastar::now(); } else if (auto p = m->incremental_maps.find(e); @@ -438,7 +452,8 @@ seastar::future<> OSDSingletonState::store_maps(ceph::os::Transaction& t, logger().info("store_maps found osdmap.{} incremental map, " "loading osdmap.{}", e, e - 1); ceph_assert(std::cmp_greater(e, 0u)); - return load_map(e - 1).then([e, bl=p->second, &t, this](auto o) { + return load_map(e - 1).then( + [&added_maps, e, bl=p->second, &t, this](auto o) { OSDMap::Incremental inc; auto i = bl.cbegin(); inc.decode(i); @@ -447,6 +462,7 @@ seastar::future<> OSDSingletonState::store_maps(ceph::os::Transaction& t, o->encode(fbl, inc.encode_features | CEPH_FEATURE_RESERVED); logger().info("store_maps storing osdmap.{}", o->get_epoch()); store_map_bl(t, e, std::move(fbl)); + added_maps.emplace(e, o.get()); osdmaps.insert(e, std::move(o)); return seastar::now(); }); @@ -454,7 +470,41 @@ seastar::future<> OSDSingletonState::store_maps(ceph::os::Transaction& t, logger().error("MOSDMap lied about what maps it had?"); return seastar::now(); } + }).then([&t, this, &added_maps] { + auto [e, map] = *added_maps.begin(); + auto lastmap = osdmaps.find(e - 1).get(); + meta_coll->store_final_pool_info(t, lastmap, added_maps); + return seastar::now(); }); + }); +} + +// Note: store/set_superblock is called in later OSD::handle_osd_map +// so we use the OSD's superblock reference meanwhile. +void OSDSingletonState::trim_maps(ceph::os::Transaction& t, + OSDSuperblock& superblock) +{ + epoch_t min = + std::min(superblock.cluster_osdmap_trim_lower_bound, + osdmaps.cached_key_lower_bound()); + + if (min <= superblock.get_oldest_map()) { + return; + } + logger().debug("{}: min={} oldest_map={}", __func__, min, superblock.get_oldest_map()); + + // Trim from the superblock's oldest_map up to `min`. + // Break if we have exceeded the txn target size. + while (superblock.get_oldest_map() < min && + t.get_num_ops() < crimson::common::local_conf()->osd_target_transaction_size) { + logger().debug("{}: removing old osdmap epoch {}", __func__, superblock.get_oldest_map()); + meta_coll->remove_map(t, superblock.get_oldest_map()); + superblock.maps.erase(superblock.get_oldest_map()); + } + + // we should not trim past osdmaps.cached_key_lower_bound() + // as there may still be PGs with those map epochs recorded. + ceph_assert(min <= osdmaps.cached_key_lower_bound()); } seastar::future<Ref<PG>> ShardServices::make_pg( @@ -715,30 +765,36 @@ seastar::future<> OSDSingletonState::send_incremental_map( "superblock's oldest map: {}", __func__, first, superblock.get_oldest_map()); if (first >= superblock.get_oldest_map()) { + // TODO: osd_map_share_max_epochs + // See OSDService::build_incremental_map_msg + if (first < superblock.cluster_osdmap_trim_lower_bound) { + logger().info("{}: cluster osdmap lower bound: {} " + " > first {}, starting with full map", + __func__, superblock.cluster_osdmap_trim_lower_bound, first); + // we don't have the next map the target wants, + // so start with a full map. + first = superblock.cluster_osdmap_trim_lower_bound; + } return load_map_bls( first, superblock.get_newest_map() - ).then([this, &conn, first](auto&& bls) { + ).then([this, &conn](auto&& bls) { auto m = crimson::make_message<MOSDMap>( monc.get_fsid(), osdmap->get_encoding_features()); - m->cluster_osdmap_trim_lower_bound = first; + m->cluster_osdmap_trim_lower_bound = superblock.cluster_osdmap_trim_lower_bound; m->newest_map = superblock.get_newest_map(); m->maps = std::move(bls); return conn.send(std::move(m)); }); } else { + // See OSDService::send_incremental_map + // just send latest full map return load_map_bl(osdmap->get_epoch() ).then([this, &conn](auto&& bl) mutable { auto m = crimson::make_message<MOSDMap>( monc.get_fsid(), osdmap->get_encoding_features()); - /* TODO: once we support the tracking of superblock's - * cluster_osdmap_trim_lower_bound, the MOSDMap should - * be populated with this value instead of the oldest_map. - * See: OSD::handle_osd_map for how classic updates the - * cluster's trim lower bound. - */ - m->cluster_osdmap_trim_lower_bound = superblock.get_oldest_map(); + m->cluster_osdmap_trim_lower_bound = superblock.cluster_osdmap_trim_lower_bound; m->newest_map = superblock.get_newest_map(); m->maps.emplace(osdmap->get_epoch(), std::move(bl)); return conn.send(std::move(m)); diff --git a/src/crimson/osd/shard_services.h b/src/crimson/osd/shard_services.h index d71513a6645..37993a4f679 100644 --- a/src/crimson/osd/shard_services.h +++ b/src/crimson/osd/shard_services.h @@ -77,6 +77,10 @@ class PerShardState { PerfCounters *perf = nullptr; PerfCounters *recoverystate_perf = nullptr; + const epoch_t& get_osdmap_tlb() { + return per_shard_superblock.cluster_osdmap_trim_lower_bound; + } + // Op Management OSDOperationRegistry registry; OperationThrottler throttler; @@ -115,7 +119,7 @@ class PerShardState { PGMap pg_map; seastar::future<> stop_pgs(); - std::map<pg_t, pg_stat_t> get_pg_stats() const; + std::map<pg_t, pg_stat_t> get_pg_stats(); seastar::future<> broadcast_map_to_pgs( ShardServices &shard_services, epoch_t epoch); @@ -181,6 +185,8 @@ class PerShardState { HeartbeatStampsRef get_hb_stamps(int peer); std::map<int, HeartbeatStampsRef> heartbeat_stamps; + seastar::future<> update_shard_superblock(OSDSuperblock superblock); + // Time state const ceph::mono_time startup_time; ceph::signedspan get_mnow() const { @@ -188,6 +194,8 @@ class PerShardState { return ceph::mono_clock::now() - startup_time; } + OSDSuperblock per_shard_superblock; + public: PerShardState( int whoami, @@ -256,7 +264,7 @@ private: } OSDSuperblock superblock; - void set_superblock(OSDSuperblock _superblock) { + void set_singleton_superblock(OSDSuperblock _superblock) { superblock = std::move(_superblock); } @@ -316,6 +324,7 @@ private: epoch_t e, bufferlist&& bl); seastar::future<> store_maps(ceph::os::Transaction& t, epoch_t start, Ref<MOSDMap> m); + void trim_maps(ceph::os::Transaction& t, OSDSuperblock& superblock); }; /** @@ -508,6 +517,8 @@ public: FORWARD_TO_OSD_SINGLETON(send_pg_temp) FORWARD_TO_LOCAL_CONST(get_mnow) FORWARD_TO_LOCAL(get_hb_stamps) + FORWARD_TO_LOCAL(update_shard_superblock) + FORWARD_TO_LOCAL(get_osdmap_tlb) FORWARD(pg_created, pg_created, local_state.pg_map) diff --git a/src/include/cephfs/types.h b/src/include/cephfs/types.h index cca0a619305..2abfe29b08c 100644 --- a/src/include/cephfs/types.h +++ b/src/include/cephfs/types.h @@ -342,6 +342,8 @@ public: } void encode(ceph::buffer::list &bl) const; void decode(ceph::buffer::list::const_iterator& bl); + void dump(ceph::Formatter *f) const; + static void generate_test_instances(std::list<inline_data_t*>& ls); version_t version = 1; diff --git a/src/include/frag.h b/src/include/frag.h index ec18bddfbb1..5ea2429aee8 100644 --- a/src/include/frag.h +++ b/src/include/frag.h @@ -159,6 +159,15 @@ public: ceph::decode_raw(v, p); _enc = v; } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("value", value()); + f->dump_unsigned("bits", bits()); + } + static void generate_test_instances(std::list<frag_t*>& ls) { + ls.push_back(new frag_t); + ls.push_back(new frag_t(10, 2)); + ls.push_back(new frag_t(11, 3)); + } bool operator<(const frag_t& b) const { if (value() != b.value()) @@ -525,6 +534,11 @@ public: } f->close_section(); // splits } + + static void generate_test_instances(std::list<fragtree_t*>& ls) { + ls.push_back(new fragtree_t); + ls.push_back(new fragtree_t); + } }; WRITE_CLASS_ENCODER(fragtree_t) diff --git a/src/include/fs_types.h b/src/include/fs_types.h index c1932bfcc30..606b9c6503d 100644 --- a/src/include/fs_types.h +++ b/src/include/fs_types.h @@ -75,6 +75,13 @@ struct inodeno_t { using ceph::decode; decode(val, p); } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("val", val); + } + static void generate_test_instances(std::list<inodeno_t*>& ls) { + ls.push_back(new inodeno_t(1)); + ls.push_back(new inodeno_t(123456789)); + } } __attribute__ ((__may_alias__)); WRITE_CLASS_ENCODER(inodeno_t) diff --git a/src/include/mempool.h b/src/include/mempool.h index 076c62afe19..a6dca48dd6f 100644 --- a/src/include/mempool.h +++ b/src/include/mempool.h @@ -26,6 +26,10 @@ #include <boost/container/flat_set.hpp> #include <boost/container/flat_map.hpp> +#if defined(_GNU_SOURCE) && defined(WITH_SEASTAR) && !defined(WITH_ALIEN) +# include <sched.h> +#endif + #include "common/Formatter.h" #include "common/ceph_atomic.h" #include "include/ceph_assert.h" @@ -201,6 +205,24 @@ enum { num_shards = 1 << num_shard_bits }; +static size_t pick_a_shard_int() { +#if defined(_GNU_SOURCE) && defined(WITH_SEASTAR) && !defined(WITH_ALIEN) + // a thread local storage is actually just an approximation; + // what we truly want is a _cpu local storage_. + // + // on the architectures we care about sched_getcpu() is + // a syscall-handled-in-userspace (vdso!). it grabs the cpu + // id kernel exposes to a task on context switch. + return sched_getcpu() & ((1 << num_shard_bits) - 1); +#else + // Dirt cheap, see: + // https://fossies.org/dox/glibc-2.32/pthread__self_8c_source.html + size_t me = (size_t)pthread_self(); + size_t i = (me >> CEPH_PAGE_SHIFT) & ((1 << num_shard_bits) - 1); + return i; +#endif +} + // // Align shard to a cacheline. // @@ -240,7 +262,18 @@ const char *get_pool_name(pool_index_t ix); struct type_t { const char *type_name; size_t item_size; +#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN) + struct type_shard_t { + ceph::atomic<ssize_t> items = {0}; // signed + char __padding[128 - sizeof(ceph::atomic<ssize_t>)]; + } __attribute__ ((aligned (128))); + static_assert(sizeof(type_shard_t) == 128, + "type_shard_t should be cacheline-sized"); + type_shard_t shards[num_shards]; +#else +// XXX: consider dropping this case for classic with perf tests ceph::atomic<ssize_t> items = {0}; // signed +#endif }; struct type_info_hash { @@ -255,6 +288,8 @@ class pool_t { mutable std::mutex lock; // only used for types list std::unordered_map<const char *, type_t> type_map; + template<pool_index_t, typename T> + friend class pool_allocator; public: // // How much this pool consumes. O(<num_shards>) @@ -264,19 +299,6 @@ public: void adjust_count(ssize_t items, ssize_t bytes); - static size_t pick_a_shard_int() { - // Dirt cheap, see: - // https://fossies.org/dox/glibc-2.32/pthread__self_8c_source.html - size_t me = (size_t)pthread_self(); - size_t i = (me >> CEPH_PAGE_SHIFT) & ((1 << num_shard_bits) - 1); - return i; - } - - shard_t* pick_a_shard() { - size_t i = pick_a_shard_int(); - return &shard[i]; - } - type_t *get_type(const std::type_info& ti, size_t size) { std::lock_guard<std::mutex> l(lock); auto p = type_map.find(ti.name()); @@ -339,11 +361,16 @@ public: T* allocate(size_t n, void *p = nullptr) { size_t total = sizeof(T) * n; - shard_t *shard = pool->pick_a_shard(); - shard->bytes += total; - shard->items += n; + const auto shid = pick_a_shard_int(); + auto& shard = pool->shard[shid]; + shard.bytes += total; + shard.items += n; if (type) { +#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN) + type->shards[shid].items += n; +#else type->items += n; +#endif } T* r = reinterpret_cast<T*>(new char[total]); return r; @@ -351,22 +378,32 @@ public: void deallocate(T* p, size_t n) { size_t total = sizeof(T) * n; - shard_t *shard = pool->pick_a_shard(); - shard->bytes -= total; - shard->items -= n; + const auto shid = pick_a_shard_int(); + auto& shard = pool->shard[shid]; + shard.bytes -= total; + shard.items -= n; if (type) { +#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN) + type->shards[shid].items -= n; +#else type->items -= n; +#endif } delete[] reinterpret_cast<char*>(p); } T* allocate_aligned(size_t n, size_t align, void *p = nullptr) { size_t total = sizeof(T) * n; - shard_t *shard = pool->pick_a_shard(); - shard->bytes += total; - shard->items += n; + const auto shid = pick_a_shard_int(); + auto& shard = pool->shard[shid]; + shard.bytes += total; + shard.items += n; if (type) { +#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN) + type->shards[shid].items += n; +#else type->items += n; +#endif } char *ptr; int rc = ::posix_memalign((void**)(void*)&ptr, align, total); @@ -378,11 +415,16 @@ public: void deallocate_aligned(T* p, size_t n) { size_t total = sizeof(T) * n; - shard_t *shard = pool->pick_a_shard(); - shard->bytes -= total; - shard->items -= n; + const auto shid = pick_a_shard_int(); + auto& shard = pool->shard[shid]; + shard.bytes -= total; + shard.items -= n; if (type) { +#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN) + type->shards[shid].items -= n; +#else type->items -= n; +#endif } aligned_free(p); } diff --git a/src/include/object.h b/src/include/object.h index 4564af86e57..2e5fb471c13 100644 --- a/src/include/object.h +++ b/src/include/object.h @@ -25,6 +25,7 @@ #include "include/rados.h" #include "include/unordered_map.h" +#include "common/Formatter.h" #include "hash.h" #include "encoding.h" @@ -58,6 +59,15 @@ struct object_t { using ceph::decode; decode(name, bl); } + + void dump(ceph::Formatter *f) const { + f->dump_string("name", name); + } + + static void generate_test_instances(std::list<object_t*>& o) { + o.push_back(new object_t); + o.push_back(new object_t("myobject")); + } }; WRITE_CLASS_ENCODER(object_t) @@ -170,6 +180,14 @@ struct sobject_t { decode(oid, bl); decode(snap, bl); } + void dump(ceph::Formatter *f) const { + f->dump_stream("oid") << oid; + f->dump_stream("snap") << snap; + } + static void generate_test_instances(std::list<sobject_t*>& o) { + o.push_back(new sobject_t); + o.push_back(new sobject_t(object_t("myobject"), 123)); + } }; WRITE_CLASS_ENCODER(sobject_t) diff --git a/src/include/types.h b/src/include/types.h index bdd09a53df1..a50a506eb0b 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -371,6 +371,14 @@ struct client_t { using ceph::decode; decode(v, bl); } + void dump(ceph::Formatter *f) const { + f->dump_int("id", v); + } + static void generate_test_instances(std::list<client_t*>& ls) { + ls.push_back(new client_t); + ls.push_back(new client_t(1)); + ls.push_back(new client_t(123)); + } }; WRITE_CLASS_ENCODER(client_t) @@ -517,7 +525,13 @@ struct shard_id_t { using ceph::decode; decode(id, bl); } - + void dump(ceph::Formatter *f) const { + f->dump_int("id", id); + } + static void generate_test_instances(std::list<shard_id_t*>& ls) { + ls.push_back(new shard_id_t(1)); + ls.push_back(new shard_id_t(2)); + } bool operator==(const shard_id_t&) const = default; auto operator<=>(const shard_id_t&) const = default; }; @@ -561,6 +575,13 @@ struct errorcode32_t { decode(code, bl); code = ceph_to_hostos_errno(code); } + void dump(ceph::Formatter *f) const { + f->dump_int("code", code); + } + static void generate_test_instances(std::list<errorcode32_t*>& ls) { + ls.push_back(new errorcode32_t(1)); + ls.push_back(new errorcode32_t(2)); + } }; WRITE_CLASS_ENCODER(errorcode32_t) @@ -602,6 +623,16 @@ struct sha_digest_t { decode(tmparr, bl); memcpy(v, tmparr.data(), SIZE); } + void dump(ceph::Formatter *f) const { + f->dump_string("sha1", to_str()); + } + static void generate_test_instances(std::list<sha_digest_t*>& ls) { + ls.push_back(new sha_digest_t); + ls.push_back(new sha_digest_t); + ls.back()->v[0] = 1; + ls.push_back(new sha_digest_t); + ls.back()->v[0] = 2; + } }; template<uint8_t S> diff --git a/src/librados/snap_set_diff.cc b/src/librados/snap_set_diff.cc index 06f76b02345..f80105b44ae 100644 --- a/src/librados/snap_set_diff.cc +++ b/src/librados/snap_set_diff.cc @@ -76,7 +76,6 @@ void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set, saw_start = true; } - *end_size = r->size; if (end < a) { ldout(cct, 20) << " past end " << end << ", end object does not exist" << dendl; *end_exists = false; @@ -88,6 +87,7 @@ void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set, } if (end <= b) { ldout(cct, 20) << " end" << dendl; + *end_size = r->size; *end_exists = true; *clone_end_snap_id = b; break; diff --git a/src/librbd/Journal.cc b/src/librbd/Journal.cc index 8ddce2e8f7d..1b37a30c17c 100644 --- a/src/librbd/Journal.cc +++ b/src/librbd/Journal.cc @@ -39,6 +39,7 @@ using util::create_async_context_callback; using util::create_context_callback; using journal::util::C_DecodeTag; using journal::util::C_DecodeTags; +using io::Extents; namespace { @@ -760,36 +761,87 @@ void Journal<I>::user_flushed() { } template <typename I> -uint64_t Journal<I>::append_write_event(uint64_t offset, size_t length, - const bufferlist &bl, - bool flush_entry) { +void Journal<I>::add_write_event_entries(uint64_t offset, size_t length, + const bufferlist &bl, + uint64_t buffer_offset, + Bufferlists *bufferlists) { ceph_assert(m_max_append_size > journal::AioWriteEvent::get_fixed_size()); - uint64_t max_write_data_size = + const uint64_t max_write_data_size = m_max_append_size - journal::AioWriteEvent::get_fixed_size(); // ensure that the write event fits within the journal entry - Bufferlists bufferlists; uint64_t bytes_remaining = length; uint64_t event_offset = 0; do { uint64_t event_length = std::min(bytes_remaining, max_write_data_size); bufferlist event_bl; - event_bl.substr_of(bl, event_offset, event_length); + event_bl.substr_of(bl, buffer_offset + event_offset, event_length); journal::EventEntry event_entry(journal::AioWriteEvent(offset + event_offset, event_length, event_bl), ceph_clock_now()); - bufferlists.emplace_back(); - encode(event_entry, bufferlists.back()); + bufferlists->emplace_back(); + encode(event_entry, bufferlists->back()); event_offset += event_length; bytes_remaining -= event_length; } while (bytes_remaining > 0); +} - return append_io_events(journal::EVENT_TYPE_AIO_WRITE, bufferlists, offset, - length, flush_entry, 0); +template <typename I> +uint64_t Journal<I>::append_write_event(const Extents &image_extents, + const bufferlist &bl, + bool flush_entry) { + Bufferlists bufferlists; + uint64_t buffer_offset = 0; + for (auto &extent : image_extents) { + add_write_event_entries(extent.first, extent.second, bl, buffer_offset, + &bufferlists); + + buffer_offset += extent.second; + } + + return append_io_events(journal::EVENT_TYPE_AIO_WRITE, bufferlists, + image_extents, flush_entry, 0); +} + +template <typename I> +uint64_t Journal<I>::append_write_same_event(const Extents &image_extents, + const bufferlist &bl, + bool flush_entry) { + Bufferlists bufferlists; + for (auto &extent : image_extents) { + journal::EventEntry event_entry( + journal::AioWriteSameEvent(extent.first, extent.second, bl), + ceph_clock_now()); + + bufferlists.emplace_back(); + encode(event_entry, bufferlists.back()); + } + + return append_io_events(journal::EVENT_TYPE_AIO_WRITESAME, bufferlists, + image_extents, flush_entry, 0); +} + +template <typename I> +uint64_t Journal<I>::append_discard_event(const Extents &image_extents, + uint32_t discard_granularity_bytes, + bool flush_entry) { + Bufferlists bufferlists; + for (auto &extent : image_extents) { + journal::EventEntry event_entry( + journal::AioDiscardEvent(extent.first, extent.second, + discard_granularity_bytes), + ceph_clock_now()); + + bufferlists.emplace_back(); + encode(event_entry, bufferlists.back()); + } + + return append_io_events(journal::EVENT_TYPE_AIO_DISCARD, bufferlists, + image_extents, flush_entry, 0); } template <typename I> @@ -832,7 +884,8 @@ uint64_t Journal<I>::append_compare_and_write_event(uint64_t offset, } while (bytes_remaining > 0); return append_io_events(journal::EVENT_TYPE_AIO_COMPARE_AND_WRITE, - bufferlists, offset, length, flush_entry, -EILSEQ); + bufferlists, {{offset, length}}, flush_entry, + -EILSEQ); } template <typename I> @@ -842,14 +895,14 @@ uint64_t Journal<I>::append_io_event(journal::EventEntry &&event_entry, bufferlist bl; event_entry.timestamp = ceph_clock_now(); encode(event_entry, bl); - return append_io_events(event_entry.get_event_type(), {bl}, offset, length, - flush_entry, filter_ret_val); + return append_io_events(event_entry.get_event_type(), {bl}, + {{offset, length}}, flush_entry, filter_ret_val); } template <typename I> uint64_t Journal<I>::append_io_events(journal::EventType event_type, const Bufferlists &bufferlists, - uint64_t offset, size_t length, + const Extents &image_extents, bool flush_entry, int filter_ret_val) { ceph_assert(!bufferlists.empty()); @@ -870,14 +923,13 @@ uint64_t Journal<I>::append_io_events(journal::EventType event_type, { std::lock_guard event_locker{m_event_lock}; - m_events[tid] = Event(futures, offset, length, filter_ret_val); + m_events[tid] = Event(futures, image_extents, filter_ret_val); } CephContext *cct = m_image_ctx.cct; ldout(cct, 20) << this << " " << __func__ << ": " << "event=" << event_type << ", " - << "offset=" << offset << ", " - << "length=" << length << ", " + << "image_extents=" << image_extents << ", " << "flush=" << flush_entry << ", tid=" << tid << dendl; Context *on_safe = create_async_context_callback( diff --git a/src/librbd/Journal.h b/src/librbd/Journal.h index 1ef9ffa8830..5327adac719 100644 --- a/src/librbd/Journal.h +++ b/src/librbd/Journal.h @@ -18,6 +18,7 @@ #include "journal/ReplayHandler.h" #include "librbd/Utils.h" #include "librbd/asio/ContextWQ.h" +#include "librbd/io/Types.h" #include "librbd/journal/Types.h" #include "librbd/journal/TypeTraits.h" @@ -133,14 +134,20 @@ public: void user_flushed(); - uint64_t append_write_event(uint64_t offset, size_t length, + uint64_t append_write_event(const io::Extents &image_extents, const bufferlist &bl, bool flush_entry); + uint64_t append_write_same_event(const io::Extents &image_extents, + const bufferlist &bl, + bool flush_entry); uint64_t append_compare_and_write_event(uint64_t offset, size_t length, const bufferlist &cmp_bl, const bufferlist &write_bl, bool flush_entry); + uint64_t append_discard_event(const io::Extents &image_extents, + uint32_t discard_granularity_bytes, + bool flush_entry); uint64_t append_io_event(journal::EventEntry &&event_entry, uint64_t offset, size_t length, bool flush_entry, int filter_ret_val); @@ -200,11 +207,13 @@ private: Event() { } - Event(const Futures &_futures, uint64_t offset, size_t length, + Event(const Futures &_futures, const io::Extents &image_extents, int filter_ret_val) : futures(_futures), filter_ret_val(filter_ret_val) { - if (length > 0) { - pending_extents.insert(offset, length); + for (auto &extent : image_extents) { + if (extent.second > 0) { + pending_extents.insert(extent.first, extent.second); + } } } }; @@ -322,9 +331,13 @@ private: bool is_journal_replaying(const ceph::mutex &) const; bool is_tag_owner(const ceph::mutex &) const; + void add_write_event_entries(uint64_t offset, size_t length, + const bufferlist &bl, + uint64_t buffer_offset, + Bufferlists *bufferlists); uint64_t append_io_events(journal::EventType event_type, const Bufferlists &bufferlists, - uint64_t offset, size_t length, bool flush_entry, + const io::Extents &extents, bool flush_entry, int filter_ret_val); Future wait_event(ceph::mutex &lock, uint64_t tid, Context *on_safe); diff --git a/src/librbd/io/ImageRequest.cc b/src/librbd/io/ImageRequest.cc index e4c41c22976..fb9f8944ed8 100644 --- a/src/librbd/io/ImageRequest.cc +++ b/src/librbd/io/ImageRequest.cc @@ -473,7 +473,7 @@ void AbstractImageWriteRequest<I>::send_request() { if (journaling) { // in-flight ops are flushed prior to closing the journal ceph_assert(image_ctx.journal != NULL); - journal_tid = append_journal_event(m_synchronous); + journal_tid = append_journal_event(); } // it's very important that IOContext is captured here instead of @@ -518,22 +518,12 @@ void ImageWriteRequest<I>::assemble_extent( } template <typename I> -uint64_t ImageWriteRequest<I>::append_journal_event(bool synchronous) { +uint64_t ImageWriteRequest<I>::append_journal_event() { I &image_ctx = this->m_image_ctx; - uint64_t tid = 0; - uint64_t buffer_offset = 0; ceph_assert(!this->m_image_extents.empty()); - for (auto &extent : this->m_image_extents) { - bufferlist sub_bl; - sub_bl.substr_of(m_bl, buffer_offset, extent.second); - buffer_offset += extent.second; - - tid = image_ctx.journal->append_write_event(extent.first, extent.second, - sub_bl, synchronous); - } - - return tid; + return image_ctx.journal->append_write_event( + this->m_image_extents, m_bl, false); } template <typename I> @@ -566,22 +556,12 @@ void ImageWriteRequest<I>::update_stats(size_t length) { } template <typename I> -uint64_t ImageDiscardRequest<I>::append_journal_event(bool synchronous) { +uint64_t ImageDiscardRequest<I>::append_journal_event() { I &image_ctx = this->m_image_ctx; - uint64_t tid = 0; ceph_assert(!this->m_image_extents.empty()); - for (auto &extent : this->m_image_extents) { - journal::EventEntry event_entry( - journal::AioDiscardEvent(extent.first, - extent.second, - this->m_discard_granularity_bytes)); - tid = image_ctx.journal->append_io_event(std::move(event_entry), - extent.first, extent.second, - synchronous, 0); - } - - return tid; + return image_ctx.journal->append_discard_event( + this->m_image_extents, m_discard_granularity_bytes, false); } template <typename I> @@ -717,21 +697,12 @@ void ImageFlushRequest<I>::send_request() { } template <typename I> -uint64_t ImageWriteSameRequest<I>::append_journal_event(bool synchronous) { +uint64_t ImageWriteSameRequest<I>::append_journal_event() { I &image_ctx = this->m_image_ctx; - uint64_t tid = 0; ceph_assert(!this->m_image_extents.empty()); - for (auto &extent : this->m_image_extents) { - journal::EventEntry event_entry(journal::AioWriteSameEvent(extent.first, - extent.second, - m_data_bl)); - tid = image_ctx.journal->append_io_event(std::move(event_entry), - extent.first, extent.second, - synchronous, 0); - } - - return tid; + return image_ctx.journal->append_write_same_event( + this->m_image_extents, m_data_bl, false); } template <typename I> @@ -768,8 +739,7 @@ void ImageWriteSameRequest<I>::update_stats(size_t length) { } template <typename I> -uint64_t ImageCompareAndWriteRequest<I>::append_journal_event( - bool synchronous) { +uint64_t ImageCompareAndWriteRequest<I>::append_journal_event() { I &image_ctx = this->m_image_ctx; uint64_t tid = 0; @@ -779,7 +749,7 @@ uint64_t ImageCompareAndWriteRequest<I>::append_journal_event( extent.second, m_cmp_bl, m_bl, - synchronous); + false); return tid; } diff --git a/src/librbd/io/ImageRequest.h b/src/librbd/io/ImageRequest.h index 2668c1acb2c..996c90a11f2 100644 --- a/src/librbd/io/ImageRequest.h +++ b/src/librbd/io/ImageRequest.h @@ -114,11 +114,6 @@ private: template <typename ImageCtxT = ImageCtx> class AbstractImageWriteRequest : public ImageRequest<ImageCtxT> { -public: - inline void flag_synchronous() { - m_synchronous = true; - } - protected: using typename ImageRequest<ImageCtxT>::ObjectRequests; @@ -127,8 +122,7 @@ protected: const char *trace_name, const ZTracer::Trace &parent_trace) : ImageRequest<ImageCtxT>(image_ctx, aio_comp, std::move(image_extents), - area, trace_name, parent_trace), - m_synchronous(false) { + area, trace_name, parent_trace) { } void send_request() override; @@ -144,11 +138,8 @@ protected: const LightweightObjectExtent &object_extent, IOContext io_context, uint64_t journal_tid, bool single_extent, Context *on_finish) = 0; - virtual uint64_t append_journal_event(bool synchronous) = 0; + virtual uint64_t append_journal_event() = 0; virtual void update_stats(size_t length) = 0; - -private: - bool m_synchronous; }; template <typename ImageCtxT = ImageCtx> @@ -180,7 +171,7 @@ protected: const LightweightObjectExtent &object_extent, IOContext io_context, uint64_t journal_tid, bool single_extent, Context *on_finish) override; - uint64_t append_journal_event(bool synchronous) override; + uint64_t append_journal_event() override; void update_stats(size_t length) override; private: @@ -215,7 +206,7 @@ protected: const LightweightObjectExtent &object_extent, IOContext io_context, uint64_t journal_tid, bool single_extent, Context *on_finish) override; - uint64_t append_journal_event(bool synchronous) override; + uint64_t append_journal_event() override; void update_stats(size_t length) override; int prune_object_extents( @@ -283,7 +274,7 @@ protected: const LightweightObjectExtent &object_extent, IOContext io_context, uint64_t journal_tid, bool single_extent, Context *on_finish) override; - uint64_t append_journal_event(bool synchronous) override; + uint64_t append_journal_event() override; void update_stats(size_t length) override; private: bufferlist m_data_bl; @@ -315,7 +306,7 @@ protected: const LightweightObjectExtent &object_extent, IOContext io_context, uint64_t journal_tid, bool single_extent, Context *on_finish) override; - uint64_t append_journal_event(bool synchronous) override; + uint64_t append_journal_event() override; void update_stats(size_t length) override; aio_type_t get_aio_type() const override { diff --git a/src/librbd/io/ObjectRequest.cc b/src/librbd/io/ObjectRequest.cc index 827f551d1f7..2bf06966c36 100644 --- a/src/librbd/io/ObjectRequest.cc +++ b/src/librbd/io/ObjectRequest.cc @@ -834,16 +834,17 @@ void ObjectListSnapsRequest<I>::handle_list_snaps(int r) { end_snap_id, &diff, &end_size, &exists, &clone_end_snap_id, &read_whole_object); - if (read_whole_object || - (!diff.empty() && - ((m_list_snaps_flags & LIST_SNAPS_FLAG_WHOLE_OBJECT) != 0))) { + if (read_whole_object) { ldout(cct, 1) << "need to read full object" << dendl; - diff.clear(); diff.insert(0, image_ctx->layout.object_size); + exists = true; end_size = image_ctx->layout.object_size; clone_end_snap_id = end_snap_id; - } else if (!exists) { - end_size = 0; + } else if ((m_list_snaps_flags & LIST_SNAPS_FLAG_WHOLE_OBJECT) != 0 && + !diff.empty()) { + ldout(cct, 20) << "expanding diff from " << diff << dendl; + diff.clear(); + diff.insert(0, image_ctx->layout.object_size); } if (exists) { @@ -884,7 +885,7 @@ void ObjectListSnapsRequest<I>::handle_list_snaps(int r) { << "end_size=" << end_size << ", " << "prev_end_size=" << prev_end_size << ", " << "exists=" << exists << ", " - << "whole_object=" << read_whole_object << dendl; + << "read_whole_object=" << read_whole_object << dendl; // check if object exists prior to start of incremental snap delta so that // we don't DNE the object if no additional deltas exist diff --git a/src/librbd/librbd.cc b/src/librbd/librbd.cc index 1272d95dd7e..132a0084a9f 100644 --- a/src/librbd/librbd.cc +++ b/src/librbd/librbd.cc @@ -15,6 +15,12 @@ #include <errno.h> +// these strand headers declare static variables that need to be shared between +// librbd.so and librados.so. referencing them here causes librbd.so to link +// their symbols as 'global unique'. see https://tracker.ceph.com/issues/63682 +#include <boost/asio/strand.hpp> +#include <boost/asio/io_context_strand.hpp> + #include "common/deleter.h" #include "common/dout.h" #include "common/errno.h" diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 0484c38cc30..a8aaf11c051 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -3752,6 +3752,7 @@ bool CDir::scrub_local() mdcache->repair_dirfrag_stats(this); scrub_infop->header->set_repaired(); good = true; + mdcache->mds->damage_table.remove_dentry_damage_entry(this); } return good; } diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 621d8e93e40..61a6854ea8e 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -4787,6 +4787,7 @@ next: false); // Flag that we repaired this BT so that it won't go into damagetable results->backtrace.repaired = true; + in->mdcache->mds->damage_table.remove_backtrace_damage_entry(in->ino()); if (in->mdcache->mds->logger) in->mdcache->mds->logger->inc(l_mds_scrub_backtrace_repaired); } @@ -4925,6 +4926,9 @@ next: << "freshly-calculated rstats don't match existing ones (will be fixed)"; in->mdcache->repair_inode_stats(in); results->raw_stats.repaired = true; + for (const auto &p : in->dirfrags){ + in->mdcache->mds->damage_table.remove_dirfrag_damage_entry(p.second); + } } else { results->raw_stats.error_str << "freshly-calculated rstats don't match existing ones"; diff --git a/src/mds/DamageTable.cc b/src/mds/DamageTable.cc index 22802079d85..2079d23333a 100644 --- a/src/mds/DamageTable.cc +++ b/src/mds/DamageTable.cc @@ -15,6 +15,7 @@ #include "common/debug.h" #include "mds/CDir.h" +#include "mds/CInode.h" #include "DamageTable.h" @@ -200,6 +201,33 @@ bool DamageTable::notify_remote_damaged(inodeno_t ino, std::string_view path) return false; } +void DamageTable::remove_dentry_damage_entry(CDir *dir) +{ + if (dentries.count( + DirFragIdent(dir->inode->ino(), dir->frag) + ) > 0){ + const auto frag_dentries = + dentries.at(DirFragIdent(dir->inode->ino(), dir->frag)); + for(const auto &i : frag_dentries) { + erase(i.second->id); + } + } +} + +void DamageTable::remove_dirfrag_damage_entry(CDir *dir) +{ + if (is_dirfrag_damaged(dir)){ + erase(dirfrags.find(DirFragIdent(dir->inode->ino(), dir->frag))->second->id); + } +} + +void DamageTable::remove_backtrace_damage_entry(inodeno_t ino) +{ + if (is_remote_damaged(ino)){ + erase(remotes.find(ino)->second->id); + } +} + bool DamageTable::oversized() const { return by_id.size() > (size_t)(g_conf()->mds_damage_table_max_entries); diff --git a/src/mds/DamageTable.h b/src/mds/DamageTable.h index 18a61e08b12..a1b96fe2218 100644 --- a/src/mds/DamageTable.h +++ b/src/mds/DamageTable.h @@ -22,6 +22,7 @@ #include "include/random.h" class CDir; +class CInode; typedef uint64_t damage_entry_id_t; @@ -155,6 +156,12 @@ class DamageTable */ bool notify_remote_damaged(inodeno_t ino, std::string_view path); + void remove_dentry_damage_entry(CDir *dir); + + void remove_dirfrag_damage_entry(CDir *dir); + + void remove_backtrace_damage_entry(inodeno_t ino); + bool is_dentry_damaged( const CDir *dir_frag, std::string_view dname, diff --git a/src/mds/MDSMap.cc b/src/mds/MDSMap.cc index 47c823bf763..013aefcf048 100644 --- a/src/mds/MDSMap.cc +++ b/src/mds/MDSMap.cc @@ -770,7 +770,7 @@ void MDSMap::encode(bufferlist& bl, uint64_t features) const encode(data_pools, bl); encode(cas_pool, bl); - __u16 ev = 17; + __u16 ev = 18; encode(ev, bl); encode(compat, bl); encode(metadata_pool, bl); @@ -947,6 +947,9 @@ void MDSMap::decode(bufferlist::const_iterator& p) if (ev >= 17) { decode(max_xattr_size, p); + } + + if (ev >= 18) { decode(bal_rank_mask, p); } diff --git a/src/mds/locks.c b/src/mds/locks.c index dbe3ab8eb00..f6ff8b9824a 100644 --- a/src/mds/locks.c +++ b/src/mds/locks.c @@ -117,7 +117,7 @@ const struct sm_state_t filelock[LOCK_MAX] = { [LOCK_XSYN_EXCL] = { LOCK_EXCL, true, LOCK_LOCK, AUTH, 0, XCL, 0, 0, 0, 0, 0,CEPH_CAP_GCACHE|CEPH_CAP_GBUFFER,0,0 }, [LOCK_XSYN] = { 0, true, LOCK_LOCK, AUTH, AUTH,AUTH,XCL, 0, 0, 0, 0,CEPH_CAP_GCACHE|CEPH_CAP_GBUFFER,0,0 }, - [LOCK_EXCL_XSYN] = { LOCK_XSYN, false, LOCK_LOCK, 0, 0, XCL, 0, 0, 0, 0, 0,CEPH_CAP_GCACHE|CEPH_CAP_GBUFFER,0,0 }, + [LOCK_EXCL_XSYN] = { LOCK_XSYN, true, LOCK_LOCK, 0, 0, XCL, 0, 0, 0, 0, 0,CEPH_CAP_GCACHE|CEPH_CAP_GBUFFER,0,0 }, [LOCK_PRE_SCAN] = { LOCK_SCAN, false, LOCK_LOCK, 0, 0, 0, 0, 0, 0, 0, 0,0,0,0 }, [LOCK_SCAN] = { LOCK_LOCK, false, LOCK_LOCK, 0, 0, 0, 0, 0, 0, 0, 0,0,0,0 }, diff --git a/src/mds/mdstypes.cc b/src/mds/mdstypes.cc index 044c3345960..21e17ca0e5b 100644 --- a/src/mds/mdstypes.cc +++ b/src/mds/mdstypes.cc @@ -284,6 +284,21 @@ void inline_data_t::decode(bufferlist::const_iterator &p) free_data(); } +void inline_data_t::dump(Formatter *f) const +{ + f->dump_unsigned("version", version); + f->dump_unsigned("length", length()); +} + +void inline_data_t::generate_test_instances(std::list<inline_data_t*>& ls) +{ + ls.push_back(new inline_data_t); + ls.push_back(new inline_data_t); + bufferlist bl; + bl.append("inline data"); + ls.back()->set_data(bl); +} + /* * fnode_t diff --git a/src/messages/MClientReply.h b/src/messages/MClientReply.h index be33fad4949..028c4200c14 100644 --- a/src/messages/MClientReply.h +++ b/src/messages/MClientReply.h @@ -291,7 +291,7 @@ struct InodeStat { }; struct openc_response_t { - _inodeno_t created_ino; + _inodeno_t created_ino{0}; interval_set<inodeno_t> delegated_inos; public: @@ -309,6 +309,16 @@ public: decode(delegated_inos, p); DECODE_FINISH(p); } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("created_ino", created_ino); + f->dump_stream("delegated_inos") << delegated_inos; + } + static void generate_test_instances(std::list<openc_response_t*>& ls) { + ls.push_back(new openc_response_t); + ls.push_back(new openc_response_t); + ls.back()->created_ino = 1; + ls.back()->delegated_inos.insert(1, 10); + } } __attribute__ ((__may_alias__)); WRITE_CLASS_ENCODER(openc_response_t) diff --git a/src/messages/MClientRequest.h b/src/messages/MClientRequest.h index f63657d2e25..cb9a888b0be 100644 --- a/src/messages/MClientRequest.h +++ b/src/messages/MClientRequest.h @@ -60,6 +60,17 @@ struct SnapPayload { decode(metadata, iter); DECODE_FINISH(iter); } + void dump(ceph::Formatter *f) const { + for (const auto &i : metadata) { + f->dump_string(i.first.c_str(), i.second); + } + } + static void generate_test_instances(std::list<SnapPayload *> &o) { + o.push_back(new SnapPayload); + o.push_back(new SnapPayload); + o.back()->metadata["key1"] = "val1"; + o.back()->metadata["key2"] = "val2"; + } }; WRITE_CLASS_ENCODER(SnapPayload) @@ -95,6 +106,26 @@ public: decode(item, bl); ceph::decode_nohead(item.dname_len, dname, bl); } + + void dump(ceph::Formatter *f) const { + f->dump_string("dname", dname); + f->dump_unsigned("ino", item.ino); + f->dump_unsigned("cap_id", item.cap_id); + f->dump_unsigned("caps", item.caps); + f->dump_unsigned("wanted", item.wanted); + f->dump_unsigned("seq", item.seq); + f->dump_unsigned("issue_seq", item.issue_seq); + f->dump_unsigned("mseq", item.mseq); + f->dump_unsigned("dname_seq", item.dname_seq); + f->dump_unsigned("dname_len", item.dname_len); + } + + static void generate_test_instances(std::list<Release*>& ls) { + ls.push_back(new Release); + ls.push_back(new Release); + ls.back()->item.dname_len = 4; + ls.back()->dname = "test"; + } }; mutable std::vector<Release> releases; /* XXX HACK! */ diff --git a/src/messages/MMDSCacheRejoin.h b/src/messages/MMDSCacheRejoin.h index a9211d53538..9237c79b6e7 100644 --- a/src/messages/MMDSCacheRejoin.h +++ b/src/messages/MMDSCacheRejoin.h @@ -63,6 +63,17 @@ public: decode(nestlock, bl); decode(dftlock, bl); } + void dump(ceph::Formatter *f) const { + f->dump_int("nonce", nonce); + f->dump_int("caps_wanted", caps_wanted); + f->dump_int("filelock", filelock); + f->dump_int("nestlock", nestlock); + f->dump_int("dftlock", dftlock); + } + static void generate_test_instances(std::list<inode_strong*>& ls) { + ls.push_back(new inode_strong); + ls.push_back(new inode_strong(1, 2, 3, 4, 5)); + } }; WRITE_CLASS_ENCODER(inode_strong) @@ -81,6 +92,14 @@ public: decode(nonce, bl); decode(dir_rep, bl); } + static void generate_test_instances(std::list<dirfrag_strong*>& ls) { + ls.push_back(new dirfrag_strong); + ls.push_back(new dirfrag_strong(1, 2)); + } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("nonce", nonce); + f->dump_unsigned("dir_rep", dir_rep); + } }; WRITE_CLASS_ENCODER(dirfrag_strong) @@ -118,6 +137,19 @@ public: decode(lock, bl); decode(alternate_name, bl); } + static void generate_test_instances(std::list<dn_strong*>& ls) { + ls.push_back(new dn_strong); + ls.push_back(new dn_strong(1, "alternate_name", 2, 3, 4, 5, 6)); + } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("first", first); + f->dump_string("alternate_name", alternate_name); + f->dump_unsigned("ino", ino); + f->dump_unsigned("remote_ino", remote_ino); + f->dump_unsigned("remote_d_type", remote_d_type); + f->dump_unsigned("nonce", nonce); + f->dump_unsigned("lock", lock); + } }; WRITE_CLASS_ENCODER(dn_strong) diff --git a/src/messages/MMgrReport.h b/src/messages/MMgrReport.h index bd3c221a847..e81282dc1a0 100644 --- a/src/messages/MMgrReport.h +++ b/src/messages/MMgrReport.h @@ -74,6 +74,27 @@ public: } DECODE_FINISH(p); } + + void dump(ceph::Formatter *f) const + { + f->dump_string("path", path); + f->dump_string("description", description); + f->dump_string("nick", nick); + f->dump_int("type", type); + f->dump_int("priority", priority); + f->dump_int("unit", unit); + } + static void generate_test_instances(std::list<PerfCounterType*>& ls) + { + ls.push_back(new PerfCounterType); + ls.push_back(new PerfCounterType); + ls.back()->path = "mycounter"; + ls.back()->description = "mycounter description"; + ls.back()->nick = "mycounter nick"; + ls.back()->type = PERFCOUNTER_COUNTER; + ls.back()->priority = PerfCountersBuilder::PRIO_CRITICAL; + ls.back()->unit = UNIT_BYTES; + } }; WRITE_CLASS_ENCODER(PerfCounterType) diff --git a/src/messages/MOSDScrubReserve.h b/src/messages/MOSDScrubReserve.h index f1f76b3e6fe..c7ab9854117 100644 --- a/src/messages/MOSDScrubReserve.h +++ b/src/messages/MOSDScrubReserve.h @@ -24,7 +24,7 @@ private: public: spg_t pgid; epoch_t map_epoch; - enum { + enum ReserveMsgOp { REQUEST = 0, GRANT = 1, RELEASE = 2, diff --git a/src/mgr/ActivePyModules.cc b/src/mgr/ActivePyModules.cc index 4f75670591a..17bb3951142 100644 --- a/src/mgr/ActivePyModules.cc +++ b/src/mgr/ActivePyModules.cc @@ -555,42 +555,6 @@ void ActivePyModules::start_one(PyModuleRef py_module) })); } -void ActivePyModules::shutdown() -{ - std::lock_guard locker(lock); - - // Stop per active module finisher thread - for (auto& [name, module] : modules) { - dout(4) << "Stopping active module " << name << " finisher thread" << dendl; - module->finisher.wait_for_empty(); - module->finisher.stop(); - } - - // Signal modules to drop out of serve() and/or tear down resources - for (auto& [name, module] : modules) { - lock.unlock(); - dout(10) << "calling module " << name << " shutdown()" << dendl; - module->shutdown(); - dout(10) << "module " << name << " shutdown() returned" << dendl; - lock.lock(); - } - - // For modules implementing serve(), finish the threads where we - // were running that. - for (auto& [name, module] : modules) { - lock.unlock(); - dout(10) << "joining module " << name << dendl; - module->thread.join(); - dout(10) << "joined module " << name << dendl; - lock.lock(); - } - - cmd_finisher.wait_for_empty(); - cmd_finisher.stop(); - - modules.clear(); -} - void ActivePyModules::notify_all(const std::string ¬ify_type, const std::string ¬ify_id) { diff --git a/src/mgr/ActivePyModules.h b/src/mgr/ActivePyModules.h index 283f96a6ed9..d6ade4849f7 100644 --- a/src/mgr/ActivePyModules.h +++ b/src/mgr/ActivePyModules.h @@ -216,7 +216,6 @@ public: std::string *err); int init(); - void shutdown(); void start_one(PyModuleRef py_module); diff --git a/src/mgr/ClusterState.cc b/src/mgr/ClusterState.cc index 7f811a5e415..6b106268efc 100644 --- a/src/mgr/ClusterState.cc +++ b/src/mgr/ClusterState.cc @@ -225,14 +225,6 @@ void ClusterState::final_init() ceph_assert(r == 0); } -void ClusterState::shutdown() -{ - // unregister commands - g_ceph_context->get_admin_socket()->unregister_commands(asok_hook); - delete asok_hook; - asok_hook = NULL; -} - bool ClusterState::asok_command( std::string_view admin_command, const cmdmap_t& cmdmap, diff --git a/src/mgr/ClusterState.h b/src/mgr/ClusterState.h index 7939cd8eb8f..2beac362b47 100644 --- a/src/mgr/ClusterState.h +++ b/src/mgr/ClusterState.h @@ -152,7 +152,6 @@ public: } void final_init(); - void shutdown(); bool asok_command(std::string_view admin_command, const cmdmap_t& cmdmap, Formatter *f, diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc index a4e85e2fc42..b1781316f82 100644 --- a/src/mgr/DaemonServer.cc +++ b/src/mgr/DaemonServer.cc @@ -98,7 +98,6 @@ DaemonServer::DaemonServer(MonClient *monc_, audit_clog(audit_clog_), pgmap_ready(false), timer(g_ceph_context, lock), - shutting_down(false), tick_event(nullptr), osd_perf_metric_collector_listener(this), osd_perf_metric_collector(osd_perf_metric_collector_listener), @@ -358,11 +357,6 @@ void DaemonServer::schedule_tick_locked(double delay_sec) tick_event = nullptr; } - // on shutdown start rejecting explicit requests to send reports that may - // originate from python land which may still be running. - if (shutting_down) - return; - tick_event = timer.add_event_after(delay_sec, new LambdaContext([this](int r) { tick(); @@ -407,19 +401,6 @@ void DaemonServer::handle_mds_perf_metric_query_updated() })); } -void DaemonServer::shutdown() -{ - dout(10) << "begin" << dendl; - msgr->shutdown(); - msgr->wait(); - cluster_state.shutdown(); - dout(10) << "done" << dendl; - - std::lock_guard l(lock); - shutting_down = true; - timer.shutdown(); -} - static DaemonKey key_from_service( const std::string& service_name, int peer_type, diff --git a/src/mgr/DaemonServer.h b/src/mgr/DaemonServer.h index a7b64561004..43125533e74 100644 --- a/src/mgr/DaemonServer.h +++ b/src/mgr/DaemonServer.h @@ -190,7 +190,6 @@ private: void maybe_ready(int32_t osd_id); SafeTimer timer; - bool shutting_down; Context *tick_event; void tick(); void schedule_tick_locked(double delay_sec); @@ -255,7 +254,6 @@ private: public: int init(uint64_t gid, entity_addrvec_t client_addrs); - void shutdown(); entity_addrvec_t get_myaddrs() const; diff --git a/src/mgr/Mgr.cc b/src/mgr/Mgr.cc index 63ad530fae2..5bd2ffb246c 100644 --- a/src/mgr/Mgr.cc +++ b/src/mgr/Mgr.cc @@ -214,12 +214,6 @@ std::map<std::string, std::string> Mgr::load_store() return loaded; } -void Mgr::handle_signal(int signum) -{ - ceph_assert(signum == SIGINT || signum == SIGTERM); - shutdown(); -} - static void handle_mgr_signal(int signum) { derr << " *** Got signal " << sig_str(signum) << " ***" << dendl; @@ -490,27 +484,6 @@ void Mgr::load_all_metadata() } } - -void Mgr::shutdown() -{ - dout(10) << "mgr shutdown init" << dendl; - finisher.queue(new LambdaContext([&](int) { - { - std::lock_guard l(lock); - // First stop the server so that we're not taking any more incoming - // requests - server.shutdown(); - } - // after the messenger is stopped, signal modules to shutdown via finisher - py_module_registry->active_shutdown(); - })); - - // Then stop the finisher to ensure its enqueued contexts aren't going - // to touch references to the things we're about to tear down - finisher.wait_for_empty(); - finisher.stop(); -} - void Mgr::handle_osd_map() { ceph_assert(ceph_mutex_is_locked_by_me(lock)); diff --git a/src/mgr/Mgr.h b/src/mgr/Mgr.h index 22ebdb68041..65931c331f3 100644 --- a/src/mgr/Mgr.h +++ b/src/mgr/Mgr.h @@ -94,9 +94,6 @@ public: bool ms_dispatch2(const ceph::ref_t<Message>& m); void background_init(Context *completion); - void shutdown(); - - void handle_signal(int signum); std::map<std::string, std::string> get_services() const; diff --git a/src/mgr/MgrStandby.cc b/src/mgr/MgrStandby.cc index 545624eb79b..052e6868177 100644 --- a/src/mgr/MgrStandby.cc +++ b/src/mgr/MgrStandby.cc @@ -295,41 +295,6 @@ void MgrStandby::tick() )); } -void MgrStandby::shutdown() -{ - finisher.queue(new LambdaContext([&](int) { - std::lock_guard l(lock); - - dout(4) << "Shutting down" << dendl; - - py_module_registry.shutdown(); - // stop sending beacon first, I use monc to talk with monitors - timer.shutdown(); - // client uses monc and objecter - client.shutdown(); - mgrc.shutdown(); - // Stop asio threads, so leftover events won't call into shut down - // monclient/objecter. - poolctx.finish(); - // stop monc, so mon won't be able to instruct me to shutdown/activate after - // the active_mgr is stopped - monc.shutdown(); - if (active_mgr) { - active_mgr->shutdown(); - } - // objecter is used by monc and active_mgr - objecter.shutdown(); - // client_messenger is used by all of them, so stop it in the end - client_messenger->shutdown(); - })); - - // Then stop the finisher to ensure its enqueued contexts aren't going - // to touch references to the things we're about to tear down - finisher.wait_for_empty(); - finisher.stop(); - mgr_perf_stop(g_ceph_context); -} - void MgrStandby::respawn() { // --- WARNING TO FUTURE COPY/PASTERS --- diff --git a/src/mgr/MgrStandby.h b/src/mgr/MgrStandby.h index 0f06e3074a0..5d238c85577 100644 --- a/src/mgr/MgrStandby.h +++ b/src/mgr/MgrStandby.h @@ -79,7 +79,6 @@ public: bool ms_handle_refused(Connection *con) override; int init(); - void shutdown(); void respawn(); int main(std::vector<const char *> args); void tick(); diff --git a/src/mgr/PyModuleRegistry.cc b/src/mgr/PyModuleRegistry.cc index f5f5008023f..eb2d2babe75 100644 --- a/src/mgr/PyModuleRegistry.cc +++ b/src/mgr/PyModuleRegistry.cc @@ -217,53 +217,6 @@ void PyModuleRegistry::active_start( } } -void PyModuleRegistry::active_shutdown() -{ - std::lock_guard locker(lock); - - if (active_modules != nullptr) { - active_modules->shutdown(); - active_modules.reset(); - } -} - -void PyModuleRegistry::shutdown() -{ - std::lock_guard locker(lock); - - if (standby_modules != nullptr) { - standby_modules->shutdown(); - standby_modules.reset(); - } - - // Ideally, now, we'd be able to do this for all modules: - // - // Py_EndInterpreter(pMyThreadState); - // PyThreadState_Swap(pMainThreadState); - // - // Unfortunately, if the module has any other *python* threads active - // at this point, Py_EndInterpreter() will abort with: - // - // Fatal Python error: Py_EndInterpreter: not the last thread - // - // This can happen when using CherryPy in a module, becuase CherryPy - // runs an extra thread as a timeout monitor, which spends most of its - // life inside a time.sleep(60). Unless you are very, very lucky with - // the timing calling this destructor, that thread will still be stuck - // in a sleep, and Py_EndInterpreter() will abort. - // - // This could of course also happen with a poorly written module which - // made no attempt to clean up any additional threads it created. - // - // The safest thing to do is just not call Py_EndInterpreter(), and - // let Py_Finalize() kill everything after all modules are shut down. - - modules.clear(); - - PyEval_RestoreThread(pMainThreadState); - Py_Finalize(); -} - std::vector<std::string> PyModuleRegistry::probe_modules(const std::string &path) const { const auto opt = g_conf().get_val<std::string>("mgr_disabled_modules"); diff --git a/src/mgr/PyModuleRegistry.h b/src/mgr/PyModuleRegistry.h index 9af9abb5762..9d6d9c2cdd0 100644 --- a/src/mgr/PyModuleRegistry.h +++ b/src/mgr/PyModuleRegistry.h @@ -122,9 +122,6 @@ public: return standby_modules != nullptr; } - void active_shutdown(); - void shutdown(); - std::vector<MonCommand> get_commands() const; std::vector<ModuleCommand> get_py_commands() const; diff --git a/src/os/bluestore/AvlAllocator.cc b/src/os/bluestore/AvlAllocator.cc index 1a170d13fdb..8de249e978c 100644 --- a/src/os/bluestore/AvlAllocator.cc +++ b/src/os/bluestore/AvlAllocator.cc @@ -39,7 +39,7 @@ uint64_t AvlAllocator::_pick_block_after(uint64_t *cursor, uint64_t search_bytes = 0; auto rs_start = range_tree.lower_bound(range_t{*cursor, size}, compare); for (auto rs = rs_start; rs != range_tree.end(); ++rs) { - uint64_t offset = p2roundup(rs->start, align); + uint64_t offset = rs->start; *cursor = offset + size; if (offset + size <= rs->end) { return offset; @@ -59,7 +59,7 @@ uint64_t AvlAllocator::_pick_block_after(uint64_t *cursor, } // If we reached end, start from beginning till cursor. for (auto rs = range_tree.begin(); rs != rs_start; ++rs) { - uint64_t offset = p2roundup(rs->start, align); + uint64_t offset = rs->start; *cursor = offset + size; if (offset + size <= rs->end) { return offset; @@ -82,7 +82,7 @@ uint64_t AvlAllocator::_pick_block_fits(uint64_t size, const auto compare = range_size_tree.key_comp(); auto rs_start = range_size_tree.lower_bound(range_t{0, size}, compare); for (auto rs = rs_start; rs != range_size_tree.end(); ++rs) { - uint64_t offset = p2roundup(rs->start, align); + uint64_t offset = rs->start; if (offset + size <= rs->end) { return offset; } diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 52857a16081..54d0bbc38e5 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -695,16 +695,24 @@ void BlueFS::_init_alloc() } logger->set(l_bluefs_wal_alloc_unit, wal_alloc_size); + + uint64_t shared_alloc_size = cct->_conf->bluefs_shared_alloc_size; + if (shared_alloc && shared_alloc->a) { + uint64_t unit = shared_alloc->a->get_block_size(); + shared_alloc_size = std::max( + unit, + shared_alloc_size); + ceph_assert(0 == p2phase(shared_alloc_size, unit)); + } if (bdev[BDEV_SLOW]) { alloc_size[BDEV_DB] = cct->_conf->bluefs_alloc_size; - alloc_size[BDEV_SLOW] = cct->_conf->bluefs_shared_alloc_size; - logger->set(l_bluefs_db_alloc_unit, cct->_conf->bluefs_alloc_size); - logger->set(l_bluefs_main_alloc_unit, cct->_conf->bluefs_shared_alloc_size); + alloc_size[BDEV_SLOW] = shared_alloc_size; } else { - alloc_size[BDEV_DB] = cct->_conf->bluefs_shared_alloc_size; - logger->set(l_bluefs_main_alloc_unit, 0); - logger->set(l_bluefs_db_alloc_unit, cct->_conf->bluefs_shared_alloc_size); + alloc_size[BDEV_DB] = shared_alloc_size; + alloc_size[BDEV_SLOW] = 0; } + logger->set(l_bluefs_db_alloc_unit, alloc_size[BDEV_DB]); + logger->set(l_bluefs_main_alloc_unit, alloc_size[BDEV_SLOW]); // new wal and db devices are never shared if (bdev[BDEV_NEWWAL]) { alloc_size[BDEV_NEWWAL] = cct->_conf->bluefs_alloc_size; @@ -718,13 +726,13 @@ void BlueFS::_init_alloc() continue; } ceph_assert(bdev[id]->get_size()); - ceph_assert(alloc_size[id]); if (is_shared_alloc(id)) { dout(1) << __func__ << " shared, id " << id << std::hex << ", capacity 0x" << bdev[id]->get_size() << ", block size 0x" << alloc_size[id] << std::dec << dendl; } else { + ceph_assert(alloc_size[id]); std::string name = "bluefs-"; const char* devnames[] = { "wal","db","slow" }; if (id <= BDEV_SLOW) @@ -1579,11 +1587,9 @@ int BlueFS::_replay(bool noop, bool to_stdout) } if (fnode.ino != 1) { vselector->sub_usage(f->vselector_hint, f->fnode); - } + vselector->add_usage(f->vselector_hint, fnode); + } f->fnode = fnode; - if (fnode.ino != 1) { - vselector->add_usage(f->vselector_hint, f->fnode); - } if (fnode.ino > ino_last) { ino_last = fnode.ino; @@ -1825,7 +1831,7 @@ int BlueFS::device_migrate_to_existing( // write entire file auto l = _allocate(dev_target, bl.length(), 0, - &file_ref->fnode, 0, false); + &file_ref->fnode, nullptr, 0, false); if (l < 0) { derr << __func__ << " unable to allocate len 0x" << std::hex << bl.length() << std::dec << " from " << (int)dev_target @@ -1965,7 +1971,7 @@ int BlueFS::device_migrate_to_new( // write entire file auto l = _allocate(dev_target, bl.length(), 0, - &file_ref->fnode, 0, false); + &file_ref->fnode, nullptr, 0, false); if (l < 0) { derr << __func__ << " unable to allocate len 0x" << std::hex << bl.length() << std::dec << " from " << (int)dev_target @@ -2559,7 +2565,6 @@ void BlueFS::_rewrite_log_and_layout_sync_LNF_LD(bool permit_dev_fallback, auto t0 = mono_clock::now(); File *log_file = log.writer->file.get(); - bluefs_fnode_t fnode_tail; // log.t.seq is always set to current live seq ceph_assert(log.t.seq == log.seq_live); // Capturing entire state. Dump anything that has been stored there. @@ -2614,7 +2619,8 @@ void BlueFS::_rewrite_log_and_layout_sync_LNF_LD(bool permit_dev_fallback, dout(20) << __func__ << " compacted_meta_need " << compacted_meta_need << dendl; - int r = _allocate(log_dev, compacted_meta_need, 0, &fnode_tail, 0, + bluefs_fnode_t fnode_tail; + int r = _allocate(log_dev, compacted_meta_need, 0, &fnode_tail, nullptr, 0, permit_dev_fallback); ceph_assert(r == 0); @@ -2625,7 +2631,7 @@ void BlueFS::_rewrite_log_and_layout_sync_LNF_LD(bool permit_dev_fallback, uint64_t starter_need = _make_initial_transaction(starter_seq, fnode_tail, 0, nullptr); bluefs_fnode_t fnode_starter(log_file->fnode.ino, 0, mtime); - r = _allocate(log_dev, starter_need, 0, &fnode_starter, 0, + r = _allocate(log_dev, starter_need, 0, &fnode_starter, nullptr, 0, permit_dev_fallback); ceph_assert(r == 0); @@ -3126,16 +3132,17 @@ void BlueFS::_extend_log(uint64_t amount) { } ll.release(); uint64_t allocated_before_extension = log.writer->file->fnode.get_allocated(); - vselector->sub_usage(log.writer->file->vselector_hint, log.writer->file->fnode); amount = round_up_to(amount, super.block_size); int r = _allocate( vselector->select_prefer_bdev(log.writer->file->vselector_hint), amount, 0, - &log.writer->file->fnode); + &log.writer->file->fnode, + [&](const bluefs_extent_t& e) { + vselector->add_usage(log.writer->file->vselector_hint, e); + }); ceph_assert(r == 0); dout(10) << "extended log by 0x" << std::hex << amount << " bytes " << dendl; - vselector->add_usage(log.writer->file->vselector_hint, log.writer->file->fnode); bluefs_transaction_t log_extend_transaction; log_extend_transaction.seq = log.t.seq; @@ -3405,7 +3412,9 @@ int BlueFS::_flush_range_F(FileWriter *h, uint64_t offset, uint64_t length) dout(10) << __func__ << " " << h << " pos 0x" << std::hex << h->pos << " 0x" << offset << "~" << length << std::dec - << " to " << h->file->fnode << dendl; + << " to " << h->file->fnode + << " hint " << h->file->vselector_hint + << dendl; if (h->file->deleted) { dout(10) << __func__ << " deleted, no-op" << dendl; return 0; @@ -3426,7 +3435,6 @@ int BlueFS::_flush_range_F(FileWriter *h, uint64_t offset, uint64_t length) ceph_assert(offset <= h->file->fnode.size); uint64_t allocated = h->file->fnode.get_allocated(); - vselector->sub_usage(h->file->vselector_hint, h->file->fnode); // do not bother to dirty the file if we are overwriting // previously allocated extents. if (allocated < offset + length) { @@ -3435,25 +3443,26 @@ int BlueFS::_flush_range_F(FileWriter *h, uint64_t offset, uint64_t length) int r = _allocate(vselector->select_prefer_bdev(h->file->vselector_hint), offset + length - allocated, 0, - &h->file->fnode); + &h->file->fnode, + [&](const bluefs_extent_t& e) { + vselector->add_usage(h->file->vselector_hint, e); + }); if (r < 0) { derr << __func__ << " allocated: 0x" << std::hex << allocated << " offset: 0x" << offset << " length: 0x" << length << std::dec << dendl; - vselector->add_usage(h->file->vselector_hint, h->file->fnode); // undo ceph_abort_msg("bluefs enospc"); return r; } h->file->is_dirty = true; } if (h->file->fnode.size < offset + length) { + vselector->add_usage(h->file->vselector_hint, offset + length - h->file->fnode.size); h->file->fnode.size = offset + length; h->file->is_dirty = true; } - dout(20) << __func__ << " file now, unflushed " << h->file->fnode << dendl; int res = _flush_data(h, offset, length, buffered); - vselector->add_usage(h->file->vselector_hint, h->file->fnode); logger->tinc(l_bluefs_flush_lat, mono_clock::now() - t0); return res; } @@ -3710,10 +3719,9 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/ _flush_bdev(h); std::lock_guard ll(log.lock); - vselector->sub_usage(h->file->vselector_hint, h->file->fnode.size); + vselector->sub_usage(h->file->vselector_hint, h->file->fnode.size - offset); h->file->fnode.size = offset; h->file->is_dirty = true; - vselector->add_usage(h->file->vselector_hint, h->file->fnode.size); log.t.op_file_update_inc(h->file->fnode); logger->tinc(l_bluefs_truncate_lat, mono_clock::now() - t0); return 0; @@ -3809,6 +3817,7 @@ const char* BlueFS::get_device_name(unsigned id) int BlueFS::_allocate(uint8_t id, uint64_t len, uint64_t alloc_unit, bluefs_fnode_t* node, + update_fn_t cb, size_t alloc_attempts, bool permit_dev_fallback) { @@ -3892,6 +3901,7 @@ int BlueFS::_allocate(uint8_t id, uint64_t len, len, alloc_unit, node, + cb, alloc_attempts, permit_dev_fallback); } else if (permit_dev_fallback && id != BDEV_SLOW && alloc[id + 1]) { @@ -3905,6 +3915,7 @@ int BlueFS::_allocate(uint8_t id, uint64_t len, len, 0, // back to default alloc unit node, + cb, alloc_attempts, permit_dev_fallback); } else { @@ -3924,9 +3935,12 @@ int BlueFS::_allocate(uint8_t id, uint64_t len, } for (auto& p : extents) { - node->append_extent(bluefs_extent_t(id, p.offset, p.length)); + bluefs_extent_t e(id, p.offset, p.length); + node->append_extent(e); + if (cb) { + cb(e); + } } - return 0; } @@ -3945,12 +3959,13 @@ int BlueFS::preallocate(FileRef f, uint64_t off, uint64_t len)/*_LF*/ if (off + len > allocated) { uint64_t want = off + len - allocated; - vselector->sub_usage(f->vselector_hint, f->fnode); int r = _allocate(vselector->select_prefer_bdev(f->vselector_hint), want, 0, - &f->fnode); - vselector->add_usage(f->vselector_hint, f->fnode); + &f->fnode, + [&](const bluefs_extent_t& e) { + vselector->add_usage(f->vselector_hint, e); + }); if (r < 0) return r; @@ -4007,7 +4022,6 @@ int BlueFS::open_for_write( _maybe_check_vselector_LNF(); FileRef file; bool create = false; - bool truncate = false; mempool::bluefs::vector<bluefs_extent_t> pending_release_extents; { std::lock_guard ll(log.lock); @@ -4034,10 +4048,12 @@ int BlueFS::open_for_write( } file = ceph::make_ref<File>(); file->fnode.ino = ++ino_last; + file->vselector_hint = vselector->get_hint_by_dir(dirname); nodes.file_map[ino_last] = file; dir->file_map.emplace_hint(q, string{filename}, file); ++file->refs; create = true; + vselector->add_usage(file->vselector_hint, file->fnode.size, true); // update file count logger->set(l_bluefs_num_files, nodes.file_map.size()); } else { // overwrite existing file? @@ -4052,8 +4068,8 @@ int BlueFS::open_for_write( << " already exists, truncate + overwrite" << dendl; vselector->sub_usage(file->vselector_hint, file->fnode); file->fnode.size = 0; + vselector->add_usage(file->vselector_hint, file->fnode.size, true); // restore file count pending_release_extents.swap(file->fnode.extents); - truncate = true; file->fnode.clear_extents(); } @@ -4061,11 +4077,6 @@ int BlueFS::open_for_write( ceph_assert(file->fnode.ino > 1); file->fnode.mtime = ceph_clock_now(); - file->vselector_hint = vselector->get_hint_by_dir(dirname); - if (create || truncate) { - vselector->add_usage(file->vselector_hint, file->fnode); // update file count - } - dout(20) << __func__ << " mapping " << dirname << "/" << filename << " vsel_hint " << file->vselector_hint << dendl; diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index f0493a758da..759e71fdf05 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -88,15 +88,102 @@ public: virtual ~BlueFSVolumeSelector() { } + /** + * Method to learn a hint (aka logic level discriminator) specific for + * BlueFS log + * + */ virtual void* get_hint_for_log() const = 0; + /** + * Method to learn a hint (aka logic level discriminator) provided directory + * bound to. + * + */ virtual void* get_hint_by_dir(std::string_view dirname) const = 0; - virtual void add_usage(void* file_hint, const bluefs_fnode_t& fnode) = 0; - virtual void sub_usage(void* file_hint, const bluefs_fnode_t& fnode) = 0; - virtual void add_usage(void* file_hint, uint64_t fsize) = 0; - virtual void sub_usage(void* file_hint, uint64_t fsize) = 0; + /** + * Increments stats for a given logical level using provided fnode as a delta, + * Parameters: + * hint: logical level discriminator + * fnode: fnode metadata to be used as a complex delta value: + * (+1 file count, +file size, +all the extents) + * + */ + void add_usage(void* hint, const bluefs_fnode_t& fnode) { + for (auto& e : fnode.extents) { + add_usage(hint, e); + } + add_usage(hint, fnode.size, true); + } + /** + * Decrements stats for a given logical level using provided fnode as a delta + * Parameters: + * hint: logical level discriminator + * fnode: fnode metadata to be used as a complex delta value: + * (-1 file count, -file size, -all the extents) + * + */ + void sub_usage(void* hint, const bluefs_fnode_t& fnode) { + for (auto& e : fnode.extents) { + sub_usage(hint, e); + } + sub_usage(hint, fnode.size, true); + } + /** + * Increments stats for a given logical level using provided extent as a delta, + * Parameters: + * hint: logical level discriminator + * extent: bluefs extent to be used as a complex delta value: + * (.bdev determines physical location, +length) + * + */ + virtual void add_usage(void* hint, const bluefs_extent_t& extent) = 0; + /** + * Decrements stats for a given logical level using provided extent as a delta, + * Parameters: + * hint: logical level discriminator + * extent: bluefs extent to be used as a complex delta value: + * (.bdev determines physical location, -length) + * + */ + virtual void sub_usage(void* hint, const bluefs_extent_t& extent) = 0; + /** + * Increments files count and overall files size for a given logical level + * Parameters: + * hint: logical level discriminator + * fsize: delta value for file size + * upd_files: whether or not to increment file count + * + */ + virtual void add_usage(void* hint, uint64_t fsize, bool upd_files = false) = 0; + /** + * Decrements files count and overall files size for a given logical level + * Parameters: + * hint: logical level discriminator + * fsize: delta value for file size + * upd_files: whether or not to decrement file count + * + */ + virtual void sub_usage(void* hint, uint64_t fsize, bool upd_files = false) = 0; + + /** + * Determines preferred physical device for the given logical level + * Parameters: + * hint: logical level discriminator + * + */ virtual uint8_t select_prefer_bdev(void* hint) = 0; + /** + * Builds path set for RocksDB to use + * Parameters: + * base: path's root + * + */ virtual void get_paths(const std::string& base, paths& res) const = 0; + /** + * Dumps VSelector's state + * + */ virtual void dump(std::ostream& sout) = 0; /* used for sanity checking of vselector */ @@ -439,9 +526,12 @@ private: return bdev[BDEV_SLOW] ? BDEV_SLOW : BDEV_DB; } const char* get_device_name(unsigned id); + + typedef std::function<void(const bluefs_extent_t)> update_fn_t; int _allocate(uint8_t bdev, uint64_t len, uint64_t alloc_unit, bluefs_fnode_t* node, + update_fn_t cb = nullptr, size_t alloc_attempts = 0, bool permit_dev_fallback = true); @@ -716,19 +806,19 @@ public: void* get_hint_for_log() const override; void* get_hint_by_dir(std::string_view dirname) const override; - void add_usage(void* hint, const bluefs_fnode_t& fnode) override { + void add_usage(void* hint, const bluefs_extent_t& extent) override { // do nothing return; } - void sub_usage(void* hint, const bluefs_fnode_t& fnode) override { + void sub_usage(void* hint, const bluefs_extent_t& extent) override { // do nothing return; } - void add_usage(void* hint, uint64_t fsize) override { + void add_usage(void*, uint64_t, bool) override { // do nothing return; } - void sub_usage(void* hint, uint64_t fsize) override { + void sub_usage(void*, uint64_t, bool) override { // do nothing return; } diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index b8dca31c057..0f79fc4f095 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -7396,7 +7396,7 @@ int BlueStore::_open_bluefs(bool create, bool read_only) bluefs->get_block_device_size(BlueFS::BDEV_WAL) * 95 / 100, bluefs->get_block_device_size(BlueFS::BDEV_DB) * 95 / 100, bluefs->get_block_device_size(BlueFS::BDEV_SLOW) * 95 / 100, - 1024 * 1024 * 1024, //FIXME: set expected l0 size here + rocks_opts.write_buffer_size * rocks_opts.max_write_buffer_number, rocks_opts.max_bytes_for_level_base, rocks_opts.max_bytes_for_level_multiplier, reserved_factor, @@ -19266,7 +19266,13 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) { auto max_x = per_level_per_dev_usage.get_max_x(); auto max_y = per_level_per_dev_usage.get_max_y(); - sout << "RocksDBBlueFSVolumeSelector Usage Matrix:" << std::endl; + sout << "RocksDBBlueFSVolumeSelector " << std::endl; + sout << ">>Settings<<" + << " extra=" << byte_u_t(db_avail4slow) + << ", l0_size=" << byte_u_t(level0_size) + << ", l_base=" << byte_u_t(level_base) + << ", l_multi=" << byte_u_t(level_multiplier) + << std::endl; constexpr std::array<const char*, 8> names{ { "DEV/LEV", "WAL", diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 095c838608e..beac82f163a 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -4240,6 +4240,9 @@ class RocksDBBlueFSVolumeSelector : public BlueFSVolumeSelector uint64_t l_totals[LEVEL_MAX - LEVEL_FIRST]; uint64_t db_avail4slow = 0; + uint64_t level0_size = 0; + uint64_t level_base = 0; + uint64_t level_multiplier = 0; enum { OLD_POLICY, USE_SOME_EXTRA @@ -4265,21 +4268,24 @@ public: if (!new_pol) { return; } - // Calculating how much extra space is available at DB volume. // Depending on the presence of explicit reserved size specification it might be either // * DB volume size - reserved // or // * DB volume size - sum_max_level_size(0, L-1) - max_level_size(L) * reserved_factor if (!reserved) { + level0_size = _level0_size; + level_base = _level_base; + level_multiplier = _level_multiplier; uint64_t prev_levels = _level0_size; uint64_t cur_level = _level_base; - uint64_t cur_threshold = 0; + uint64_t cur_threshold = prev_levels + cur_level; do { - uint64_t next_level = cur_level * _level_multiplier; - uint64_t next_threshold = prev_levels + cur_level + next_level * reserved_factor; + uint64_t next_level = cur_level * _level_multiplier; + uint64_t next_threshold = prev_levels + cur_level + next_level; if (_db_total <= next_threshold) { - db_avail4slow = cur_threshold ? _db_total - cur_threshold : 0; + cur_threshold *= reserved_factor; + db_avail4slow = cur_threshold < _db_total ? _db_total - cur_threshold : 0; break; } else { prev_levels += cur_level; @@ -4288,7 +4294,7 @@ public: } } while (true); } else { - db_avail4slow = _db_total - reserved; + db_avail4slow = reserved < _db_total ? _db_total - reserved : 0; } } @@ -4297,63 +4303,40 @@ public: } void* get_hint_by_dir(std::string_view dirname) const override; - void add_usage(void* hint, const bluefs_fnode_t& fnode) override { + void add_usage(void* hint, const bluefs_extent_t& extent) override { if (hint == nullptr) return; size_t pos = (size_t)hint - LEVEL_FIRST; - for (auto& p : fnode.extents) { - auto& cur = per_level_per_dev_usage.at(p.bdev, pos); - auto& max = per_level_per_dev_max.at(p.bdev, pos); - uint64_t v = cur.fetch_add(p.length) + p.length; - while (v > max) { - max.exchange(v); - } - { - //update per-device totals - auto& cur = per_level_per_dev_usage.at(p.bdev, LEVEL_MAX - LEVEL_FIRST); - auto& max = per_level_per_dev_max.at(p.bdev, LEVEL_MAX - LEVEL_FIRST); - uint64_t v = cur.fetch_add(p.length) + p.length; - while (v > max) { - max.exchange(v); - } - } + auto& cur = per_level_per_dev_usage.at(extent.bdev, pos); + auto& max = per_level_per_dev_max.at(extent.bdev, pos); + uint64_t v = cur.fetch_add(extent.length) + extent.length; + while (v > max) { + max.exchange(v); } { - //update per-level actual totals - auto& cur = per_level_per_dev_usage.at(BlueFS::MAX_BDEV, pos); - auto& max = per_level_per_dev_max.at(BlueFS::MAX_BDEV, pos); - uint64_t v = cur.fetch_add(fnode.size) + fnode.size; + //update per-device totals + auto& cur = per_level_per_dev_usage.at(extent.bdev, LEVEL_MAX - LEVEL_FIRST); + auto& max = per_level_per_dev_max.at(extent.bdev, LEVEL_MAX - LEVEL_FIRST); + uint64_t v = cur.fetch_add(extent.length) + extent.length; while (v > max) { max.exchange(v); } } - ++per_level_files[pos]; - ++per_level_files[LEVEL_MAX - LEVEL_FIRST]; } - void sub_usage(void* hint, const bluefs_fnode_t& fnode) override { + void sub_usage(void* hint, const bluefs_extent_t& extent) override { if (hint == nullptr) return; size_t pos = (size_t)hint - LEVEL_FIRST; - for (auto& p : fnode.extents) { - auto& cur = per_level_per_dev_usage.at(p.bdev, pos); - ceph_assert(cur >= p.length); - cur -= p.length; - - //update per-device totals - auto& cur2 = per_level_per_dev_usage.at(p.bdev, LEVEL_MAX - LEVEL_FIRST); - ceph_assert(cur2 >= p.length); - cur2 -= p.length; - } - //update per-level actual totals - auto& cur = per_level_per_dev_usage.at(BlueFS::MAX_BDEV, pos); - ceph_assert(cur >= fnode.size); - cur -= fnode.size; - ceph_assert(per_level_files[pos] > 0); - --per_level_files[pos]; - ceph_assert(per_level_files[LEVEL_MAX - LEVEL_FIRST] > 0); - --per_level_files[LEVEL_MAX - LEVEL_FIRST]; + auto& cur = per_level_per_dev_usage.at(extent.bdev, pos); + ceph_assert(cur >= extent.length); + cur -= extent.length; + + //update per-device totals + auto& cur2 = per_level_per_dev_usage.at(extent.bdev, LEVEL_MAX - LEVEL_FIRST); + ceph_assert(cur2 >= extent.length); + cur2 -= extent.length; } - void add_usage(void* hint, uint64_t size_more) override { + void add_usage(void* hint, uint64_t size_more, bool upd_files) override { if (hint == nullptr) return; size_t pos = (size_t)hint - LEVEL_FIRST; @@ -4364,8 +4347,12 @@ public: while (v > max) { max.exchange(v); } + if (upd_files) { + ++per_level_files[pos]; + ++per_level_files[LEVEL_MAX - LEVEL_FIRST]; + } } - void sub_usage(void* hint, uint64_t size_less) override { + void sub_usage(void* hint, uint64_t size_less, bool upd_files) override { if (hint == nullptr) return; size_t pos = (size_t)hint - LEVEL_FIRST; @@ -4373,6 +4360,12 @@ public: auto& cur = per_level_per_dev_usage.at(BlueFS::MAX_BDEV, pos); ceph_assert(cur >= size_less); cur -= size_less; + if (upd_files) { + ceph_assert(per_level_files[pos] > 0); + --per_level_files[pos]; + ceph_assert(per_level_files[LEVEL_MAX - LEVEL_FIRST] > 0); + --per_level_files[LEVEL_MAX - LEVEL_FIRST]; + } } uint8_t select_prefer_bdev(void* h) override; diff --git a/src/os/bluestore/BtreeAllocator.cc b/src/os/bluestore/BtreeAllocator.cc index 2071603a219..2647b875992 100644 --- a/src/os/bluestore/BtreeAllocator.cc +++ b/src/os/bluestore/BtreeAllocator.cc @@ -25,7 +25,7 @@ uint64_t BtreeAllocator::_pick_block_after(uint64_t *cursor, { auto rs_start = range_tree.lower_bound(*cursor); for (auto rs = rs_start; rs != range_tree.end(); ++rs) { - uint64_t offset = p2roundup(rs->first, align); + uint64_t offset = rs->first; if (offset + size <= rs->second) { *cursor = offset + size; return offset; @@ -37,7 +37,7 @@ uint64_t BtreeAllocator::_pick_block_after(uint64_t *cursor, } // If we reached end, start from beginning till cursor. for (auto rs = range_tree.begin(); rs != rs_start; ++rs) { - uint64_t offset = p2roundup(rs->first, align); + uint64_t offset = rs->first; if (offset + size <= rs->second) { *cursor = offset + size; return offset; @@ -53,7 +53,7 @@ uint64_t BtreeAllocator::_pick_block_fits(uint64_t size, // the needs auto rs_start = range_size_tree.lower_bound(range_value_t{0,size}); for (auto rs = rs_start; rs != range_size_tree.end(); ++rs) { - uint64_t offset = p2roundup(rs->start, align); + uint64_t offset = rs->start; if (offset + size <= rs->start + rs->size) { return offset; } diff --git a/src/os/bluestore/StupidAllocator.cc b/src/os/bluestore/StupidAllocator.cc index 550024e67e7..8f74a499ed3 100644 --- a/src/os/bluestore/StupidAllocator.cc +++ b/src/os/bluestore/StupidAllocator.cc @@ -52,20 +52,6 @@ void StupidAllocator::_insert_free(uint64_t off, uint64_t len) } } -/// return the effective length of the extent if we align to alloc_unit -uint64_t StupidAllocator::_aligned_len( - StupidAllocator::interval_set_t::iterator p, - uint64_t alloc_unit) -{ - uint64_t skew = p.get_start() % alloc_unit; - if (skew) - skew = alloc_unit - skew; - if (skew > p.get_len()) - return 0; - else - return p.get_len() - skew; -} - int64_t StupidAllocator::allocate_int( uint64_t want_size, uint64_t alloc_unit, int64_t hint, uint64_t *offset, uint32_t *length) @@ -89,7 +75,7 @@ int64_t StupidAllocator::allocate_int( for (bin = orig_bin; bin < (int)free.size(); ++bin) { p = free[bin].lower_bound(hint); while (p != free[bin].end()) { - if (_aligned_len(p, alloc_unit) >= want_size) { + if (p.get_len() >= want_size) { goto found; } ++p; @@ -102,7 +88,7 @@ int64_t StupidAllocator::allocate_int( p = free[bin].begin(); auto end = hint ? free[bin].lower_bound(hint) : free[bin].end(); while (p != end) { - if (_aligned_len(p, alloc_unit) >= want_size) { + if (p.get_len() >= want_size) { goto found; } ++p; @@ -114,7 +100,7 @@ int64_t StupidAllocator::allocate_int( for (bin = orig_bin; bin >= 0; --bin) { p = free[bin].lower_bound(hint); while (p != free[bin].end()) { - if (_aligned_len(p, alloc_unit) >= alloc_unit) { + if (p.get_len() >= alloc_unit) { goto found; } ++p; @@ -127,7 +113,7 @@ int64_t StupidAllocator::allocate_int( p = free[bin].begin(); auto end = hint ? free[bin].lower_bound(hint) : free[bin].end(); while (p != end) { - if (_aligned_len(p, alloc_unit) >= alloc_unit) { + if (p.get_len() >= alloc_unit) { goto found; } ++p; @@ -137,11 +123,9 @@ int64_t StupidAllocator::allocate_int( return -ENOSPC; found: - uint64_t skew = p.get_start() % alloc_unit; - if (skew) - skew = alloc_unit - skew; - *offset = p.get_start() + skew; - *length = std::min(std::max(alloc_unit, want_size), p2align((p.get_len() - skew), alloc_unit)); + *offset = p.get_start(); + *length = std::min(std::max(alloc_unit, want_size), p2align(p.get_len(), alloc_unit)); + if (cct->_conf->bluestore_debug_small_allocations) { uint64_t max = alloc_unit * (rand() % cct->_conf->bluestore_debug_small_allocations); @@ -158,7 +142,7 @@ int64_t StupidAllocator::allocate_int( free[bin].erase(*offset, *length); uint64_t off, len; - if (*offset && free[bin].contains(*offset - skew - 1, &off, &len)) { + if (*offset && free[bin].contains(*offset - 1, &off, &len)) { int newbin = _choose_bin(len); if (newbin != bin) { ldout(cct, 30) << __func__ << " demoting 0x" << std::hex << off << "~" << len diff --git a/src/os/bluestore/StupidAllocator.h b/src/os/bluestore/StupidAllocator.h index 0d50d73f42a..443b091350e 100644 --- a/src/os/bluestore/StupidAllocator.h +++ b/src/os/bluestore/StupidAllocator.h @@ -31,10 +31,6 @@ class StupidAllocator : public Allocator { unsigned _choose_bin(uint64_t len); void _insert_free(uint64_t offset, uint64_t len); - uint64_t _aligned_len( - interval_set_t::iterator p, - uint64_t alloc_unit); - public: StupidAllocator(CephContext* cct, int64_t size, diff --git a/src/os/bluestore/bluestore_types.cc b/src/os/bluestore/bluestore_types.cc index a3d0d41acb5..3c8dc84810c 100644 --- a/src/os/bluestore/bluestore_types.cc +++ b/src/os/bluestore/bluestore_types.cc @@ -1181,6 +1181,15 @@ void bluestore_onode_t::shard_info::dump(Formatter *f) const f->dump_unsigned("bytes", bytes); } +void bluestore_onode_t::shard_info::generate_test_instances( + list<shard_info*>& o) +{ + o.push_back(new shard_info); + o.push_back(new shard_info); + o.back()->offset = 123; + o.back()->bytes = 456; +} + ostream& operator<<(ostream& out, const bluestore_onode_t::shard_info& si) { return out << std::hex << "0x" << si.offset << "(0x" << si.bytes << " bytes" diff --git a/src/os/bluestore/bluestore_types.h b/src/os/bluestore/bluestore_types.h index 8a0e0b7c9f7..66d4189ab6d 100644 --- a/src/os/bluestore/bluestore_types.h +++ b/src/os/bluestore/bluestore_types.h @@ -1011,6 +1011,7 @@ struct bluestore_onode_t { denc_varint(v.bytes, p); } void dump(ceph::Formatter *f) const; + static void generate_test_instances(std::list<shard_info*>& ls); }; std::vector<shard_info> extent_map_shards; ///< extent std::map shards (if any) diff --git a/src/os/bluestore/fastbmap_allocator_impl.cc b/src/os/bluestore/fastbmap_allocator_impl.cc index cea046c3ff2..4833b9d1a7b 100644 --- a/src/os/bluestore/fastbmap_allocator_impl.cc +++ b/src/os/bluestore/fastbmap_allocator_impl.cc @@ -17,19 +17,9 @@ uint64_t AllocatorLevel::l2_allocs = 0; inline interval_t _align2units(uint64_t offset, uint64_t len, uint64_t min_length) { - interval_t res; - if (len >= min_length) { - res.offset = p2roundup(offset, min_length); - auto delta_off = res.offset - offset; - if (len > delta_off) { - res.length = len - delta_off; - res.length = p2align<uint64_t>(res.length, min_length); - if (res.length) { - return res; - } - } - } - return interval_t(); + return len >= min_length ? + interval_t(offset, p2align<uint64_t>(len, min_length)) : + interval_t(); } interval_t AllocatorLevel01Loose::_get_longest_from_l0(uint64_t pos0, diff --git a/src/osd/PG.cc b/src/osd/PG.cc index d2f97a129a2..490f8fa68d5 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1490,7 +1490,7 @@ std::optional<requested_scrub_t> PG::validate_initiated_scrub( upd_flags.time_for_deep = time_for_deep; upd_flags.deep_scrub_on_error = false; - upd_flags.auto_repair = false; // will only be considered for periodic scrubs + upd_flags.auto_repair = false; if (upd_flags.must_deep_scrub) { upd_flags.calculated_to_deep = true; @@ -1506,6 +1506,25 @@ std::optional<requested_scrub_t> PG::validate_initiated_scrub( } } + if (try_to_auto_repair) { + // for shallow scrubs: rescrub if errors found + // for deep: turn 'auto-repair' on + if (upd_flags.calculated_to_deep) { + dout(10) << fmt::format( + "{}: performing an auto-repair deep scrub", + __func__) + << dendl; + upd_flags.auto_repair = true; + } else { + dout(10) << fmt::format( + "{}: will perform an auto-repair deep scrub if errors " + "are found", + __func__) + << dendl; + upd_flags.deep_scrub_on_error = true; + } + } + return upd_flags; } @@ -1823,6 +1842,11 @@ void PG::on_activate(interval_set<snapid_t> snaps) m_scrubber->on_pg_activate(m_planned_scrub); } +void PG::on_replica_activate() +{ + m_scrubber->on_replica_activate(); +} + void PG::on_active_exit() { backfill_reserving = false; diff --git a/src/osd/PG.h b/src/osd/PG.h index 2e82e74ab01..8713b1c8ae8 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -624,6 +624,8 @@ public: void on_activate(interval_set<snapid_t> snaps) override; + void on_replica_activate() override; + void on_activate_committed() override; void on_active_actmap() override; @@ -1446,10 +1448,13 @@ public: */ class PGLockWrapper { public: - explicit PGLockWrapper(PGRef locked_pg) : m_pg{locked_pg} {} + template <typename A_PG_REF> + explicit PGLockWrapper(A_PG_REF&& locked_pg) + : m_pg{std::forward<A_PG_REF>(locked_pg)} + {} PGRef pg() { return m_pg; } ~PGLockWrapper(); - PGLockWrapper(PGLockWrapper&& rhs) : m_pg(std::move(rhs.m_pg)) { + PGLockWrapper(PGLockWrapper&& rhs) noexcept : m_pg(std::move(rhs.m_pg)) { rhs.m_pg = nullptr; } PGLockWrapper(const PGLockWrapper& rhs) = delete; diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index 2c41b7b7118..c81cb958a7a 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -2967,6 +2967,8 @@ void PeeringState::activate( state_set(PG_STATE_ACTIVATING); pl->on_activate(std::move(to_trim)); + } else { + pl->on_replica_activate(); } if (acting_set_writeable()) { PGLog::LogEntryHandlerRef rollbacker{pl->get_log_handler(t)}; diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index b9f8401b05b..89ef702e0fe 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -389,6 +389,7 @@ public: virtual void on_role_change() = 0; virtual void on_change(ObjectStore::Transaction &t) = 0; virtual void on_activate(interval_set<snapid_t> to_trim) = 0; + virtual void on_replica_activate() {} virtual void on_activate_complete() = 0; virtual void on_new_interval() = 0; virtual Context *on_clean() = 0; diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc index e3a71e26234..99367170dba 100644 --- a/src/osd/scrubber/osd_scrub.cc +++ b/src/osd/scrubber/osd_scrub.cc @@ -441,14 +441,14 @@ void OsdScrub::dec_scrubs_local() m_resource_bookkeeper.dec_scrubs_local(); } -bool OsdScrub::inc_scrubs_remote() +bool OsdScrub::inc_scrubs_remote(pg_t pgid) { - return m_resource_bookkeeper.inc_scrubs_remote(); + return m_resource_bookkeeper.inc_scrubs_remote(pgid); } -void OsdScrub::dec_scrubs_remote() +void OsdScrub::dec_scrubs_remote(pg_t pgid) { - m_resource_bookkeeper.dec_scrubs_remote(); + m_resource_bookkeeper.dec_scrubs_remote(pgid); } void OsdScrub::mark_pg_scrub_blocked(spg_t blocked_pg) diff --git a/src/osd/scrubber/osd_scrub.h b/src/osd/scrubber/osd_scrub.h index 570430660ed..56167df2ee6 100644 --- a/src/osd/scrubber/osd_scrub.h +++ b/src/osd/scrubber/osd_scrub.h @@ -67,8 +67,8 @@ class OsdScrub { // updating the resource counters bool inc_scrubs_local(); void dec_scrubs_local(); - bool inc_scrubs_remote(); - void dec_scrubs_remote(); + bool inc_scrubs_remote(pg_t pgid); + void dec_scrubs_remote(pg_t pgid); // counting the number of PGs stuck while scrubbing, waiting for objects void mark_pg_scrub_blocked(spg_t blocked_pg); diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 4cd861b89c8..edc6c2a2dcd 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -85,6 +85,13 @@ ostream& operator<<(ostream& out, const requested_scrub_t& sf) return out; } +void PgScrubber::on_replica_activate() +{ + dout(10) << __func__ << dendl; + m_fsm->process_event(ReplicaActivate{}); +} + + /* * if the incoming message is from a previous interval, it must mean * PrimaryLogPG::on_change() was called when that interval ended. We can safely @@ -197,7 +204,6 @@ bool PgScrubber::should_abort() const * * Some of the considerations above are also relevant to the replica-side * initiation - * ('StartReplica' & 'StartReplicaNoWait'). */ void PgScrubber::initiate_regular_scrub(epoch_t epoch_queued) @@ -216,11 +222,6 @@ void PgScrubber::initiate_regular_scrub(epoch_t epoch_queued) } } -void PgScrubber::dec_scrubs_remote() -{ - m_osds->get_scrub_services().dec_scrubs_remote(); -} - void PgScrubber::advance_token() { m_current_token++; @@ -274,13 +275,7 @@ void PgScrubber::send_start_replica(epoch_t epoch_queued, } if (check_interval(epoch_queued) && is_token_current(token)) { - // save us some time by not waiting for updates if there are none - // to wait for. Affects the transition from NotActive into either - // ReplicaWaitUpdates or ActiveReplica. - if (pending_active_pushes()) - m_fsm->process_event(StartReplica{}); - else - m_fsm->process_event(StartReplicaNoWait{}); + m_fsm->process_event(StartReplica{}); } dout(10) << "scrubber event --<< " << __func__ << dendl; } @@ -452,6 +447,11 @@ unsigned int PgScrubber::scrub_requeue_priority( * Responsible for resetting any scrub state and releasing any resources. * Any inflight events will be ignored via check_interval/should_drop_message * or canceled. + * Specifically: + * - if Primary and in an active session - the IntervalChanged handler takes + * care of discarding the remote reservations, and transitioning out of + * Session. That resets both the scrubber and the FSM. + * - if we are a reserved replica - we need to free ourselves; */ void PgScrubber::on_new_interval() { @@ -461,13 +461,7 @@ void PgScrubber::on_new_interval() is_scrub_active(), is_queued_or_active()) << dendl; - // If in active session - the IntervalChanged handler takes care of - // discarding the remote reservations, and transitioning out of Session. - // That resets both the scrubber and the FSM. m_fsm->process_event(IntervalChanged{}); - - // The 'FullReset' is only relevant if we are not an active Primary - m_fsm->process_event(FullReset{}); rm_from_osd_scrubbing(); } @@ -806,7 +800,7 @@ void PgScrubber::cancel_callback(scrubber_callback_cancel_token_t token) m_osds->sleep_timer.cancel_event(token); } -LogChannelRef &PgScrubber::get_clog() const +LogChannelRef& PgScrubber::get_clog() const { return m_osds->clog; } @@ -816,6 +810,11 @@ int PgScrubber::get_whoami() const return m_osds->whoami; } +[[nodiscard]] bool PgScrubber::is_high_priority() const +{ + return m_flags.required; +} + /* * The selected range is set directly into 'm_start' and 'm_end' * setting: @@ -1139,13 +1138,7 @@ void PgScrubber::on_init() m_pg->publish_stats_to_osd(); } -/* - * Note: as on_replica_init() is likely to be called twice (entering - * both ReplicaWaitUpdates & ActiveReplica), its operations should be - * idempotent. - * Now that it includes some state-changing operations, we need to check - * m_active against double-activation. - */ + void PgScrubber::on_replica_init() { dout(10) << __func__ << " called with 'active' " @@ -1159,6 +1152,7 @@ void PgScrubber::on_replica_init() } } + int PgScrubber::build_primary_map_chunk() { epoch_t map_building_since = m_pg->get_osdmap_epoch(); @@ -1217,23 +1211,21 @@ int PgScrubber::build_replica_map_chunk() // the local map has been created. Send it to the primary. // Note: once the message reaches the Primary, it may ask us for another - // chunk - and we better be done with the current scrub. Thus - the - // preparation of the reply message is separate, and we clear the scrub - // state before actually sending it. + // chunk - and we better be done with the current scrub. The clearing of + // state must be complete before we relinquish the PG lock. - auto reply = prep_replica_map_msg(PreemptionNoted::no_preemption); - replica_handling_done(); - dout(15) << __func__ << " chunk map sent " << dendl; - send_replica_map(reply); - } break; + send_replica_map(prep_replica_map_msg(PreemptionNoted::no_preemption)); + dout(15) << fmt::format("{}: chunk map sent", __func__) << dendl; + } + break; default: // negative retval: build_scrub_map_chunk() signalled an error // Pre-Pacific code ignored this option, treating it as a success. // \todo Add an error flag in the returning message. + // \todo: must either abort, send a reply, or return some error message dout(1) << "Error! Aborting. ActiveReplica::react(SchedReplica) Ret: " << ret << dendl; - replica_handling_done(); // only in debug mode for now: assert(false && "backend error"); break; @@ -1520,6 +1512,7 @@ void PgScrubber::replica_scrub_op(OpRequestRef op) replica_scrubmap_pos.reset(); // needed? RRR set_queued_or_active(); + advance_token(); m_osds->queue_for_rep_scrub(m_pg, m_replica_request_priority, m_flags.priority, @@ -1675,7 +1668,7 @@ void PgScrubber::handle_scrub_reserve_msgs(OpRequestRef op) auto m = op->get_req<MOSDScrubReserve>(); switch (m->type) { case MOSDScrubReserve::REQUEST: - handle_scrub_reserve_request(op); + m_fsm->process_event(ReplicaReserveReq{op, m->from}); break; case MOSDScrubReserve::GRANT: m_fsm->process_event(ReplicaGrant{op, m->from}); @@ -1684,65 +1677,12 @@ void PgScrubber::handle_scrub_reserve_msgs(OpRequestRef op) m_fsm->process_event(ReplicaReject{op, m->from}); break; case MOSDScrubReserve::RELEASE: - handle_scrub_reserve_release(op); + m_fsm->process_event(ReplicaRelease{op, m->from}); break; } } -void PgScrubber::handle_scrub_reserve_request(OpRequestRef op) -{ - auto request_ep = op->sent_epoch; - dout(20) << fmt::format("{}: request_ep:{} recovery:{}", - __func__, - request_ep, - m_osds->is_recovery_active()) - << dendl; - - // The primary may unilaterally restart the scrub process without notifying - // replicas. Unconditionally clear any existing state prior to handling - // the new reservation. - m_fsm->process_event(FullReset{}); - - bool granted{false}; - if (m_pg->cct->_conf->osd_scrub_during_recovery || - !m_osds->is_recovery_active()) { - - granted = m_osds->get_scrub_services().inc_scrubs_remote(); - if (granted) { - m_fsm->process_event(ReplicaGrantReservation{}); - } else { - dout(20) << __func__ << ": failed to reserve remotely" << dendl; - } - } else { - dout(10) << __func__ << ": recovery is active; not granting" << dendl; - } - - dout(10) << __func__ << " reserved? " << (granted ? "yes" : "no") << dendl; - - Message* reply = new MOSDScrubReserve( - spg_t(m_pg->info.pgid.pgid, m_pg->get_primary().shard), - request_ep, - granted ? MOSDScrubReserve::GRANT : MOSDScrubReserve::REJECT, - m_pg_whoami); - - m_osds->send_message_osd_cluster(reply, op->get_req()->get_connection()); -} - -void PgScrubber::handle_scrub_reserve_release(OpRequestRef op) -{ - dout(10) << __func__ << " " << *op->get_req() << dendl; - if (should_drop_message(op)) { - // we might have turned into a Primary in the meantime. The interval - // change should have been noticed already, and caused us to reset. - return; - } - - // this specific scrub session has terminated. All incoming events carrying - // the old tag will be discarded. - m_fsm->process_event(FullReset{}); -} - bool PgScrubber::set_reserving_now() { return m_osds->get_scrub_services().set_reserving_now(m_pg_id, ceph_clock_now()); @@ -2211,6 +2151,7 @@ void PgScrubber::handle_query_state(ceph::Formatter* f) PgScrubber::~PgScrubber() { + m_fsm->process_event(IntervalChanged{}); if (m_scrub_job) { // make sure the OSD won't try to scrub this one just now rm_from_osd_scrubbing(); diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index 9946f9ce5ee..3ac4d6dfdb7 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -258,14 +258,6 @@ class PgScrubber : public ScrubPgIF, */ void handle_scrub_reserve_msgs(OpRequestRef op) final; - /** - * we are a replica being asked by the Primary to reserve OSD resources for - * scrubbing - */ - void handle_scrub_reserve_request(OpRequestRef op); - - void handle_scrub_reserve_release(OpRequestRef op); - // managing scrub op registration void update_scrub_job(const requested_scrub_t& request_flags) final; @@ -334,6 +326,8 @@ class PgScrubber : public ScrubPgIF, void on_new_interval() final; + void on_replica_activate() final; + void scrub_clear_state() final; bool is_queued_or_active() const final; @@ -408,6 +402,9 @@ class PgScrubber : public ScrubPgIF, return m_pg->recovery_state.is_primary(); } + /// is this scrub more than just regular periodic scrub? + [[nodiscard]] bool is_high_priority() const final; + void set_state_name(const char* name) final { m_fsm_state_name = name; @@ -476,13 +473,9 @@ class PgScrubber : public ScrubPgIF, [[nodiscard]] bool was_epoch_changed() const final; void set_queued_or_active() final; - /// Clears `m_queued_or_active` and restarts snaptrimming + /// Clears `m_queued_or_active` and restarts snap-trimming void clear_queued_or_active() final; - void dec_scrubs_remote() final; - - void advance_token() final; - void mark_local_map_ready() final; [[nodiscard]] bool are_all_maps_available() const final; @@ -567,6 +560,9 @@ class PgScrubber : public ScrubPgIF, void requeue_waiting() const { m_pg->requeue_ops(m_pg->waiting_for_scrub); } + /// Modify the token identifying the current replica scrub operation + void advance_token(); + /** * mark down some parameters of the initiated scrub: * - the epoch when started; @@ -675,11 +671,20 @@ class PgScrubber : public ScrubPgIF, epoch_t m_epoch_start{0}; ///< the actual epoch when scrubbing started /** - * (replica) a tag identifying a specific scrub "session". Incremented - * whenever the Primary releases the replica scrub resources. When the scrub - * session is terminated (even if the interval remains unchanged, as might - * happen following an asok no-scrub command), stale scrub-resched messages - * triggered by the backend will be discarded. + * (replica) a tag identifying a specific replica operation, i.e. the + * creation of the replica scrub map for a single chunk. + * + * Background: the backend is asynchronous, and the specific + * operations are size-limited. While the scrubber handles a specific + * request, it is continuously triggered to poll the backend for the + * full results for the chunk handled. + * Once the chunk request becomes obsolete, either following an interval + * change or if a new request was received, we must not send the stale + * data to the primary. The polling of the obsolete chunk request must + * stop, and the stale backend response should be discarded. + * In other words - the token should be read as saying "the primary has + * lost interest in the results of all operations identified by mismatched + * token values". */ Scrub::act_token_t m_current_token{1}; diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc index 40b43b6e077..2e18ed5cf4e 100644 --- a/src/osd/scrubber/scrub_machine.cc +++ b/src/osd/scrubber/scrub_machine.cc @@ -654,62 +654,179 @@ ScrubMachine::~ScrubMachine() = default; // -------- for replicas ----------------------------------------------------- -// ----------------------- ReservedReplica -------------------------------- +// ----------------------- ReplicaActive -------------------------------- -ReservedReplica::ReservedReplica(my_context ctx) +ReplicaActive::ReplicaActive(my_context ctx) : my_base(ctx) - , NamedSimply(context<ScrubMachine>().m_scrbr, "ReservedReplica") + , NamedSimply(context<ScrubMachine>().m_scrbr, "ReplicaActive") { - dout(10) << "-- state -->> ReservedReplica" << dendl; + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + dout(10) << "-- state -->> ReplicaActive" << dendl; + m_pg = scrbr->get_pg(); + m_osds = m_pg->get_pg_osd(ScrubberPasskey()); } -ReservedReplica::~ReservedReplica() +ReplicaActive::~ReplicaActive() { DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases - scrbr->dec_scrubs_remote(); - scrbr->advance_token(); + if (reserved_by_my_primary) { + dout(10) << "ReplicaActive::~ReplicaActive(): clearing reservation" + << dendl; + clear_reservation_by_remote_primary(); + } } -// ----------------------- ReplicaIdle -------------------------------- -ReplicaIdle::ReplicaIdle(my_context ctx) - : my_base(ctx) - , NamedSimply(context<ScrubMachine>().m_scrbr, "ReplicaIdle") +/* + * Note: we are expected to be in the initial internal state (Idle) when + * receiving any registration request. Our other internal states, the + * active ones, have their own handler for this event, and will treat it + * as an abort request. + * + * Process: + * - if already reserved: clear existing reservation, then continue + * - ask the OSD for the "reservation resource" + * - if granted: mark it internally and notify the Primary. + * - otherwise: just notify the requesting primary. + */ +void ReplicaActive::on_reserve_req(const ReplicaReserveReq& ev) +{ + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + dout(10) << "ReplicaActive::on_reserve_req()" << dendl; + + if (reserved_by_my_primary) { + dout(10) << "ReplicaActive::on_reserve_req(): already reserved" << dendl; + // clear the existing reservation + clear_reservation_by_remote_primary(); // clears the flag, too + } + + // ask the OSD for the reservation + const auto ret = get_remote_reservation(); + if (ret.granted) { + reserved_by_my_primary = true; + dout(10) << fmt::format("{}: reserved? yes", __func__) << dendl; + } else { + dout(10) << fmt::format("{}: reserved? no ({})", __func__, ret.error_msg) + << dendl; + } + + Message* reply = new MOSDScrubReserve( + spg_t(pg_id.pgid, m_pg->get_primary().shard), ev.m_op->sent_epoch, ret.op, + m_pg->pg_whoami); + m_osds->send_message_osd_cluster(reply, ev.m_op->get_req()->get_connection()); +} + + +void ReplicaActive::on_release(const ReplicaRelease& ev) +{ + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + if (!reserved_by_my_primary) { + dout(5) << fmt::format( + "ReplicaActive::on_release() from {}: not reserved!", + ev.m_from) + << dendl; + return; + } + dout(10) << fmt::format("ReplicaActive::on_release() from {}", ev.m_from) + << dendl; + clear_reservation_by_remote_primary(); +} + + +ReplicaActive::ReservationAttemptRes ReplicaActive::get_remote_reservation() +{ + using ReservationAttemptRes = ReplicaActive::ReservationAttemptRes; + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + if (!scrbr->get_pg_cct()->_conf.get_val<bool>("osd_scrub_during_recovery") && + m_osds->is_recovery_active()) { + return ReservationAttemptRes{ + MOSDScrubReserve::REJECT, "recovery is active", false}; + } + + if (m_osds->get_scrub_services().inc_scrubs_remote(scrbr->get_spgid().pgid)) { + return ReservationAttemptRes{MOSDScrubReserve::GRANT, "", true}; + } else { + return ReservationAttemptRes{ + MOSDScrubReserve::REJECT, "failed to reserve remotely", false}; + } +} + + +void ReplicaActive::clear_reservation_by_remote_primary() { - dout(10) << "-- state -->> ReplicaIdle" << dendl; + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + dout(10) << "ReplicaActive::clear_reservation_by_remote_primary()" << dendl; + m_osds->get_scrub_services().dec_scrubs_remote(scrbr->get_spgid().pgid); + reserved_by_my_primary = false; } -ReplicaIdle::~ReplicaIdle() + +void ReplicaActive::check_for_updates(const StartReplica& ev) { + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + dout(10) << "ReplicaActive::check_for_updates()" << dendl; + post_event(ReplicaPushesUpd{}); } +// ---------------- ReplicaActive/ReplicaIdle --------------------------- -// ----------------------- ReplicaActiveOp -------------------------------- +ReplicaIdle::ReplicaIdle(my_context ctx) + : my_base(ctx) + , NamedSimply(context<ScrubMachine>().m_scrbr, "ReplicaActive/ReplicaIdle") +{ + dout(10) << "-- state -->> ReplicaActive/ReplicaIdle" << dendl; +} + + +// ------------- ReplicaActive/ReplicaActiveOp -------------------------- ReplicaActiveOp::ReplicaActiveOp(my_context ctx) : my_base(ctx) , NamedSimply(context<ScrubMachine>().m_scrbr, "ReplicaActiveOp") { - dout(10) << "-- state -->> ReplicaActiveOp" << dendl; + dout(10) << "-- state -->> ReplicaActive/ReplicaActiveOp" << dendl; + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + scrbr->on_replica_init(); } + ReplicaActiveOp::~ReplicaActiveOp() { DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + dout(10) << __func__ << dendl; scrbr->replica_handling_done(); } -// ----------------------- ReplicaWaitUpdates -------------------------------- +sc::result ReplicaActiveOp::react(const StartReplica&) +{ + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + dout(10) << "ReplicaActiveOp::react(const StartReplica&)" << dendl; + + const auto msg = fmt::format( + "osd.{} pg[{}]: new chunk request while still handling the previous one", + scrbr->get_whoami(), scrbr->get_spgid()); + dout(1) << msg << dendl; + scrbr->get_clog()->warn() << msg; + + post_event(ReplicaPushesUpd{}); + + // exit & re-enter the state + return transit<ReplicaActiveOp>(); +} + +// ------------- ReplicaActive/ReplicaWaitUpdates ------------------------ ReplicaWaitUpdates::ReplicaWaitUpdates(my_context ctx) : my_base(ctx) - , NamedSimply(context<ScrubMachine>().m_scrbr, "ReplicaWaitUpdates") + , NamedSimply( + context<ScrubMachine>().m_scrbr, + "ReplicaActive/ReplicaActiveOp/ReplicaWaitUpdates") { - dout(10) << "-- state -->> ReplicaWaitUpdates" << dendl; - DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases - scrbr->on_replica_init(); + dout(10) << "-- state -->> ReplicaActive/ReplicaActiveOp/ReplicaWaitUpdates" + << dendl; } + /* * Triggered externally, by the entity that had an update re pushes */ @@ -720,7 +837,6 @@ sc::result ReplicaWaitUpdates::react(const ReplicaPushesUpd&) << scrbr->pending_active_pushes() << dendl; if (scrbr->pending_active_pushes() == 0) { - // done waiting return transit<ReplicaBuildingMap>(); } @@ -728,19 +844,21 @@ sc::result ReplicaWaitUpdates::react(const ReplicaPushesUpd&) return discard_event(); } + // ----------------------- ReplicaBuildingMap ----------------------------------- ReplicaBuildingMap::ReplicaBuildingMap(my_context ctx) : my_base(ctx) - , NamedSimply(context<ScrubMachine>().m_scrbr, "ReplicaBuildingMap") + , NamedSimply( + context<ScrubMachine>().m_scrbr, + "ReplicaActive/ReplicaActiveOp/ReplicaBuildingMap") { - DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases - dout(10) << "-- state -->> ReplicaBuildingMap" << dendl; - // and as we might have skipped ReplicaWaitUpdates: - scrbr->on_replica_init(); + dout(10) << "-- state -->> ReplicaActive/ReplicaActiveOp/ReplicaBuildingMap" + << dendl; post_event(SchedReplica{}); } + sc::result ReplicaBuildingMap::react(const SchedReplica&) { DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases @@ -751,7 +869,6 @@ sc::result ReplicaBuildingMap::react(const SchedReplica&) dout(10) << "replica scrub job preempted" << dendl; scrbr->send_preempted_replica(); - scrbr->replica_handling_done(); return transit<ReplicaIdle>(); } diff --git a/src/osd/scrubber/scrub_machine.h b/src/osd/scrubber/scrub_machine.h index cbce07fe183..7e9c887145a 100644 --- a/src/osd/scrubber/scrub_machine.h +++ b/src/osd/scrubber/scrub_machine.h @@ -48,39 +48,54 @@ namespace mpl = ::boost::mpl; void on_event_creation(std::string_view nm); void on_event_discard(std::string_view nm); -// reservation grant/reject events carry the peer's response: -/// a replica has granted our reservation request -struct ReplicaGrant : sc::event<ReplicaGrant> { - OpRequestRef m_op; - pg_shard_t m_from; - ReplicaGrant(OpRequestRef op, pg_shard_t from) : m_op{op}, m_from{from} +template <typename EV> +struct OpCarryingEvent : sc::event<EV> { + static constexpr const char* event_name = "<>"; + const OpRequestRef m_op; + const pg_shard_t m_from; + OpCarryingEvent(OpRequestRef op, pg_shard_t from) : m_op{op}, m_from{from} { - on_event_creation("ReplicaGrant"); + on_event_creation(static_cast<EV*>(this)->event_name); } + + OpCarryingEvent(const OpCarryingEvent&) = default; + OpCarryingEvent(OpCarryingEvent&&) = default; + OpCarryingEvent& operator=(const OpCarryingEvent&) = default; + OpCarryingEvent& operator=(OpCarryingEvent&&) = default; + void print(std::ostream* out) const { - *out << fmt::format("ReplicaGrant(from: {})", m_from); + *out << fmt::format("{} (from: {})", EV::event_name, m_from); } - std::string_view print() const { return "ReplicaGrant"; } - ~ReplicaGrant() { on_event_discard("ReplicaGrant"); } + std::string_view print() const { return EV::event_name; } + ~OpCarryingEvent() { on_event_discard(EV::event_name); } }; -/// a replica has denied our reservation request -struct ReplicaReject : sc::event<ReplicaReject> { - OpRequestRef m_op; - pg_shard_t m_from; - ReplicaReject(OpRequestRef op, pg_shard_t from) : m_op{op}, m_from{from} - { - on_event_creation("ReplicaReject"); - } - void print(std::ostream* out) const - { - *out << fmt::format("ReplicaReject(from: {})", m_from); +#define OP_EV(T) \ + struct T : OpCarryingEvent<T> { \ + static constexpr const char* event_name = #T; \ + template <typename... Args> \ + T(Args&&... args) : OpCarryingEvent(std::forward<Args>(args)...) \ + { \ + } \ } - std::string_view print() const { return "ReplicaReject"; } - ~ReplicaReject() { on_event_discard("ReplicaReject"); } -}; + + +// reservation events carry peer's request/response data: + +/// a replica has granted our reservation request +OP_EV(ReplicaGrant); + +/// a replica has denied our reservation request +OP_EV(ReplicaReject); + +/// received Primary request for scrub reservation +OP_EV(ReplicaReserveReq); + +/// explicit release request from the Primary +OP_EV(ReplicaRelease); + #define MEV(E) \ struct E : sc::event<E> { \ @@ -149,15 +164,12 @@ MEV(IntLocalMapDone) /// scrub_snapshot_metadata() MEV(DigestUpdate) -/// event emitted when the replica grants a reservation to the primary -MEV(ReplicaGrantReservation) +/// we are a replica for this PG +MEV(ReplicaActivate) /// initiating replica scrub MEV(StartReplica) -/// 'start replica' when there are no pending updates -MEV(StartReplicaNoWait) - MEV(SchedReplica) /// Update to active_pushes. 'active_pushes' represents recovery @@ -196,8 +208,11 @@ struct NotActive; ///< the quiescent state. No active scrubbing. struct Session; ///< either reserving or actively scrubbing struct ReservingReplicas; ///< securing scrub resources from replicas' OSDs struct ActiveScrubbing; ///< the active state for a Primary. A sub-machine. -struct ReplicaIdle; ///< Initial reserved replica state -struct ReplicaBuildingMap; ///< an active state for a replica. +// the active states for a replica: +struct ReplicaActive; ///< the quiescent state for a replica +struct ReplicaActiveOp; +struct ReplicaWaitUpdates; +struct ReplicaBuildingMap; class ScrubMachine : public sc::state_machine<ScrubMachine, NotActive> { @@ -355,8 +370,8 @@ public: * * - a special end-of-recovery Primary scrub event ('AfterRepairScrub'). * - * - (for a replica) 'StartReplica' or 'StartReplicaNoWait', triggered by - * an incoming MOSDRepScrub message. + * - (if already in ReplicaActive): an incoming MOSDRepScrub triggers + * 'StartReplica'. * * note (20.8.21): originally, AfterRepairScrub was triggering a scrub without * waiting for replica resources to be acquired. But once replicas started @@ -366,11 +381,13 @@ public: struct NotActive : sc::state<NotActive, ScrubMachine>, NamedSimply { explicit NotActive(my_context ctx); - using reactions = - mpl::list<sc::custom_reaction<StartScrub>, - // a scrubbing that was initiated at recovery completion: - sc::custom_reaction<AfterRepairScrub>, - sc::transition<ReplicaGrantReservation, ReplicaIdle>>; + using reactions = mpl::list< + sc::custom_reaction<StartScrub>, + // a scrubbing that was initiated at recovery completion: + sc::custom_reaction<AfterRepairScrub>, + // peering done, and we are a replica + sc::transition<ReplicaActivate, ReplicaActive>>; + sc::result react(const StartScrub&); sc::result react(const AfterRepairScrub&); }; @@ -596,47 +613,111 @@ struct WaitDigestUpdate : sc::state<WaitDigestUpdate, ActiveScrubbing>, // ----------------------------- the "replica active" states -/** - * ReservedReplica +/* + * The replica states: + * + * ReplicaActive - starts after being peered as a replica. Ends on interval. + * - maintain the "I am reserved by a primary" state; + * - handles reservation requests + * + * - ReplicaIdle - ready for a new scrub request + * * initial state of ReplicaActive * - * Parent state for replica states, Controls lifecycle for - * PgScrubber::m_reservations. + * - ReplicaActiveOp - handling a single map request op + * * ReplicaWaitUpdates + * * ReplicaBuildingMap */ -struct ReservedReplica : sc::state<ReservedReplica, ScrubMachine, ReplicaIdle>, + +struct ReplicaIdle; + +struct ReplicaActive : sc::state<ReplicaActive, ScrubMachine, ReplicaIdle>, NamedSimply { - explicit ReservedReplica(my_context ctx); - ~ReservedReplica(); + explicit ReplicaActive(my_context ctx); + ~ReplicaActive(); - using reactions = mpl::list<sc::transition<FullReset, NotActive>>; -}; + /// handle a reservation request from a primary + void on_reserve_req(const ReplicaReserveReq&); -struct ReplicaWaitUpdates; + /// handle a 'release' from a primary + void on_release(const ReplicaRelease&); -/** - * ReplicaIdle - * - * Replica is waiting for a map request. - */ -struct ReplicaIdle : sc::state<ReplicaIdle, ReservedReplica>, - NamedSimply { - explicit ReplicaIdle(my_context ctx); - ~ReplicaIdle(); + void check_for_updates(const StartReplica&); using reactions = mpl::list< - sc::transition<StartReplica, ReplicaWaitUpdates>, - sc::transition<StartReplicaNoWait, ReplicaBuildingMap>>; + // a reservation request from the primary + sc::in_state_reaction< + ReplicaReserveReq, + ReplicaActive, + &ReplicaActive::on_reserve_req>, + // an explicit release request from the primary + sc::in_state_reaction< + ReplicaRelease, + ReplicaActive, + &ReplicaActive::on_release>, + // when the interval ends - we may not be a replica anymore + sc::transition<IntervalChanged, NotActive>>; + + private: + bool reserved_by_my_primary{false}; + + // shortcuts: + PG* m_pg; + OSDService* m_osds; + + /// a convenience internal result structure + struct ReservationAttemptRes { + MOSDScrubReserve::ReserveMsgOp op; // GRANT or REJECT + std::string_view error_msg; + bool granted; + }; + + /// request a scrub resource from our local OSD + /// (after performing some checks) + ReservationAttemptRes get_remote_reservation(); + + void clear_reservation_by_remote_primary(); +}; + + +struct ReplicaIdle : sc::state<ReplicaIdle, ReplicaActive>, NamedSimply { + explicit ReplicaIdle(my_context ctx); + ~ReplicaIdle() = default; + + // note the execution of check_for_updates() when transitioning to + // ReplicaActiveOp/ReplicaWaitUpdates. That would trigger a ReplicaPushesUpd + // event, which will be handled by ReplicaWaitUpdates. + using reactions = mpl::list<sc::transition< + StartReplica, + ReplicaWaitUpdates, + ReplicaActive, + &ReplicaActive::check_for_updates>>; }; + /** - * ReservedActiveOp + * ReplicaActiveOp * - * Lifetime matches handling for a single map request op + * Lifetime matches handling for a single map request op. */ struct ReplicaActiveOp - : sc::state<ReplicaActiveOp, ReservedReplica, ReplicaWaitUpdates>, - NamedSimply { + : sc::state<ReplicaActiveOp, ReplicaActive, ReplicaWaitUpdates>, + NamedSimply { explicit ReplicaActiveOp(my_context ctx); ~ReplicaActiveOp(); + + using reactions = mpl::list<sc::custom_reaction<StartReplica>>; + + /** + * Handling the unexpected (read - caused by a bug) case of receiving a + * new chunk request while still handling the previous one. + * To note: + * - the primary is evidently no longer waiting for the results of the + * previous request. On the other hand + * - we must respond to the new request, as the primary would wait for + * it "forever"`, + * - and we should log this unexpected scenario clearly in the cluster log. + */ + sc::result react(const StartReplica&); }; /* @@ -646,7 +727,7 @@ struct ReplicaActiveOp * - the details of the Primary's request were internalized by PgScrubber; * - 'active' scrubbing is set */ -struct ReplicaWaitUpdates : sc::state<ReplicaWaitUpdates, ReservedReplica>, +struct ReplicaWaitUpdates : sc::state<ReplicaWaitUpdates, ReplicaActiveOp>, NamedSimply { explicit ReplicaWaitUpdates(my_context ctx); using reactions = mpl::list<sc::custom_reaction<ReplicaPushesUpd>>; @@ -655,8 +736,8 @@ struct ReplicaWaitUpdates : sc::state<ReplicaWaitUpdates, ReservedReplica>, }; -struct ReplicaBuildingMap : sc::state<ReplicaBuildingMap, ReservedReplica> - , NamedSimply { +struct ReplicaBuildingMap : sc::state<ReplicaBuildingMap, ReplicaActiveOp>, + NamedSimply { explicit ReplicaBuildingMap(my_context ctx); using reactions = mpl::list<sc::custom_reaction<SchedReplica>>; diff --git a/src/osd/scrubber/scrub_machine_lstnr.h b/src/osd/scrubber/scrub_machine_lstnr.h index 4206c789f91..34247083f4f 100644 --- a/src/osd/scrubber/scrub_machine_lstnr.h +++ b/src/osd/scrubber/scrub_machine_lstnr.h @@ -196,12 +196,6 @@ struct ScrubMachineListener { virtual void set_queued_or_active() = 0; virtual void clear_queued_or_active() = 0; - /// Release remote scrub reservation - virtual void dec_scrubs_remote() = 0; - - /// Advance replica token - virtual void advance_token() = 0; - /** * Our scrubbing is blocked, waiting for an excessive length of time for * our target chunk to be unlocked. We will set the corresponding flags, @@ -235,4 +229,7 @@ struct ScrubMachineListener { // temporary interface (to be discarded in a follow-up PR) /// set the 'resources_failure' flag in the scrub-job object virtual void flag_reservations_failure() = 0; + + /// is this scrub more than just regular periodic scrub? + [[nodiscard]] virtual bool is_high_priority() const = 0; }; diff --git a/src/osd/scrubber/scrub_reservations.cc b/src/osd/scrubber/scrub_reservations.cc index 3194c9871d5..449856e7b45 100644 --- a/src/osd/scrubber/scrub_reservations.cc +++ b/src/osd/scrubber/scrub_reservations.cc @@ -49,12 +49,17 @@ ReplicaReservations::ReplicaReservations(ScrubMachineListener& scrbr) }); m_next_to_request = m_sorted_secondaries.cbegin(); - // send out the 1'st request (unless we have no replicas) - send_next_reservation_or_complete(); - - m_slow_response_warn_timeout = - m_scrubber.get_pg_cct()->_conf.get_val<milliseconds>( - "osd_scrub_slow_reservation_response"); + if (m_scrubber.is_high_priority()) { + // for high-priority scrubs (i.e. - user-initiated), no reservations are + // needed. + dout(10) << "high-priority scrub - no reservations needed" << dendl; + } else { + // send out the 1'st request (unless we have no replicas) + send_next_reservation_or_complete(); + m_slow_response_warn_timeout = + m_scrubber.get_pg_cct()->_conf.get_val<milliseconds>( + "osd_scrub_slow_reservation_response"); + } } void ReplicaReservations::release_all() diff --git a/src/osd/scrubber/scrub_resources.cc b/src/osd/scrubber/scrub_resources.cc index 179bd5e7e0e..dd9d31a1573 100644 --- a/src/osd/scrubber/scrub_resources.cc +++ b/src/osd/scrubber/scrub_resources.cc @@ -4,10 +4,12 @@ #include "./scrub_resources.h" #include <fmt/format.h> +#include <fmt/ranges.h> #include "common/debug.h" #include "include/ceph_assert.h" +#include "osd/osd_types_fmt.h" using ScrubResources = Scrub::ScrubResources; @@ -19,28 +21,38 @@ ScrubResources::ScrubResources( , conf{config} {} +// ------------------------- scrubbing as primary on this OSD ----------------- + +// can we increase the number of concurrent scrubs performed by Primaries +// on this OSD? note that counted separately from the number of scrubs +// performed by replicas. bool ScrubResources::can_inc_scrubs() const { std::lock_guard lck{resource_lock}; - if (scrubs_local + scrubs_remote < conf->osd_max_scrubs) { - return true; - } - log_upwards(fmt::format( - "{}== false. {} (local) + {} (remote) >= max ({})", __func__, - scrubs_local, scrubs_remote, conf->osd_max_scrubs)); - return false; + return can_inc_local_scrubs_unlocked(); } bool ScrubResources::inc_scrubs_local() { std::lock_guard lck{resource_lock}; - if (scrubs_local + scrubs_remote < conf->osd_max_scrubs) { + if (can_inc_local_scrubs_unlocked()) { ++scrubs_local; + log_upwards(fmt::format( + "{}: {} -> {} (max {}, remote {})", __func__, (scrubs_local - 1), + scrubs_local, conf->osd_max_scrubs, granted_reservations.size())); + return true; + } + return false; +} + +bool ScrubResources::can_inc_local_scrubs_unlocked() const +{ + if (scrubs_local < conf->osd_max_scrubs) { return true; } log_upwards(fmt::format( - "{}: {} (local) + {} (remote) >= max ({})", __func__, scrubs_local, - scrubs_remote, conf->osd_max_scrubs)); + "{}: Cannot add local scrubs. Current counter ({}) >= max ({})", __func__, + scrubs_local, conf->osd_max_scrubs)); return false; } @@ -48,43 +60,62 @@ void ScrubResources::dec_scrubs_local() { std::lock_guard lck{resource_lock}; log_upwards(fmt::format( - "{}: {} -> {} (max {}, remote {})", __func__, scrubs_local, - (scrubs_local - 1), conf->osd_max_scrubs, scrubs_remote)); + "{}: {} -> {} (max {}, remote {})", + __func__, scrubs_local, (scrubs_local - 1), conf->osd_max_scrubs, + granted_reservations.size())); --scrubs_local; ceph_assert(scrubs_local >= 0); } -bool ScrubResources::inc_scrubs_remote() +// ------------------------- scrubbing on this OSD as replicas ---------------- + +bool ScrubResources::inc_scrubs_remote(pg_t pgid) { std::lock_guard lck{resource_lock}; - if (scrubs_local + scrubs_remote < conf->osd_max_scrubs) { + + // if this PG is already reserved - it's probably a benign bug. + // report it, but do not fail the reservation. + if (granted_reservations.contains(pgid)) { + log_upwards(fmt::format("{}: pg[{}] already reserved", __func__, pgid)); + return true; + } + + auto pre_op_cnt = granted_reservations.size(); + if (std::cmp_less(pre_op_cnt, conf->osd_max_scrubs)) { + granted_reservations.insert(pgid); log_upwards(fmt::format( - "{}: {} -> {} (max {}, local {})", __func__, scrubs_remote, - (scrubs_remote + 1), conf->osd_max_scrubs, scrubs_local)); - ++scrubs_remote; + "{}: pg[{}] reserved. Remote scrubs count changed from {} -> {} (max " + "{}, local {})", + __func__, pgid, pre_op_cnt, granted_reservations.size(), + conf->osd_max_scrubs, scrubs_local)); return true; } log_upwards(fmt::format( - "{}: {} (local) + {} (remote) >= max ({})", __func__, scrubs_local, - scrubs_remote, conf->osd_max_scrubs)); + "{}: pg[{}] failed. Too many concurrent replica scrubs ({} >= max ({}))", + __func__, pgid, pre_op_cnt, conf->osd_max_scrubs)); return false; } -void ScrubResources::dec_scrubs_remote() +void ScrubResources::dec_scrubs_remote(pg_t pgid) { std::lock_guard lck{resource_lock}; - log_upwards(fmt::format( - "{}: {} -> {} (max {}, local {})", __func__, scrubs_remote, - (scrubs_remote - 1), conf->osd_max_scrubs, scrubs_local)); - --scrubs_remote; - ceph_assert(scrubs_remote >= 0); + // we might not have this PG in the set (e.g. if we are concluding a + // high priority scrub, one that does not require reservations) + auto cnt = granted_reservations.erase(pgid); + if (cnt) { + log_upwards(fmt::format( + "{}: remote reservation for {} removed -> {} (max {}, local {})", + __func__, pgid, granted_reservations.size(), conf->osd_max_scrubs, + scrubs_local)); + } } void ScrubResources::dump_scrub_reservations(ceph::Formatter* f) const { std::lock_guard lck{resource_lock}; f->dump_int("scrubs_local", scrubs_local); - f->dump_int("scrubs_remote", scrubs_remote); + f->dump_int("granted_reservations", granted_reservations.size()); + f->dump_string("PGs being served", fmt::format("{}", granted_reservations)); f->dump_int("osd_max_scrubs", conf->osd_max_scrubs); } diff --git a/src/osd/scrubber/scrub_resources.h b/src/osd/scrubber/scrub_resources.h index 890ee5d0e2f..75807a10f82 100644 --- a/src/osd/scrubber/scrub_resources.h +++ b/src/osd/scrubber/scrub_resources.h @@ -8,6 +8,7 @@ #include "common/ceph_mutex.h" #include "common/config_proxy.h" #include "common/Formatter.h" +#include "osd/osd_types.h" namespace Scrub { @@ -28,8 +29,9 @@ class ScrubResources { /// the number of concurrent scrubs performed by Primaries on this OSD int scrubs_local{0}; - /// the number of active scrub reservations granted by replicas - int scrubs_remote{0}; + /// the set of PGs that have active scrub reservations as replicas + /// \todo come C++23 - consider std::flat_set<pg_t> + std::set<pg_t> granted_reservations; mutable ceph::mutex resource_lock = ceph::make_mutex("ScrubQueue::resource_lock"); @@ -38,6 +40,10 @@ class ScrubResources { const ceph::common::ConfigProxy& conf; + /// an aux used to check available local scrubs. Must be called with + /// the resource lock held. + bool can_inc_local_scrubs_unlocked() const; + public: explicit ScrubResources( log_upwards_t log_access, @@ -56,10 +62,10 @@ class ScrubResources { void dec_scrubs_local(); /// increments the number of scrubs acting as a Replica - bool inc_scrubs_remote(); + bool inc_scrubs_remote(pg_t pgid); /// decrements the number of scrubs acting as a Replica - void dec_scrubs_remote(); + void dec_scrubs_remote(pg_t pgid); void dump_scrub_reservations(ceph::Formatter* f) const; }; diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index 16810bba15c..ce269563f9a 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -18,12 +18,14 @@ struct PGPool; namespace Scrub { class ReplicaReservations; + struct ReplicaActive; } /// Facilitating scrub-related object access to private PG data class ScrubberPasskey { private: friend class Scrub::ReplicaReservations; + friend struct Scrub::ReplicaActive; friend class PrimaryLogScrub; friend class PgScrubber; friend class ScrubBackend; @@ -169,8 +171,7 @@ struct requested_scrub_t { * the value of auto_repair is determined in sched_scrub() (once per scrub. * previous value is not remembered). Set if * - allowed by configuration and backend, and - * - must_scrub is not set (i.e. - this is a periodic scrub), - * - time_for_deep was just set + * - for periodic scrubs: time_for_deep was just set */ bool auto_repair{false}; @@ -310,6 +311,9 @@ struct ScrubPgIF { /// the OSD scrub queue virtual void on_new_interval() = 0; + /// we are peered as a replica + virtual void on_replica_activate() = 0; + virtual void scrub_clear_state() = 0; virtual void handle_query_state(ceph::Formatter* f) = 0; diff --git a/src/pybind/mgr/dashboard/controllers/_crud.py b/src/pybind/mgr/dashboard/controllers/_crud.py index 240a2b5ab8c..d65649cadb4 100644 --- a/src/pybind/mgr/dashboard/controllers/_crud.py +++ b/src/pybind/mgr/dashboard/controllers/_crud.py @@ -104,6 +104,7 @@ class Validator(Enum): RGW_ROLE_NAME = 'rgwRoleName' RGW_ROLE_PATH = 'rgwRolePath' FILE = 'file' + RGW_ROLE_SESSION_DURATION = 'rgwRoleSessionDuration' class FormField(NamedTuple): @@ -224,6 +225,10 @@ class Container: properties[field.key]['title'] = field.name field_ui_schema['key'] = field_key field_ui_schema['readonly'] = field.readonly + if field.readonly: + field_ui_schema['templateOptions'] = { + 'disabled': True + } field_ui_schema['help'] = f'{field.help}' field_ui_schema['validators'] = [i.value for i in field.validators] items.append(field_ui_schema) @@ -307,6 +312,7 @@ class CRUDMeta(SerializableClass): self.forms = [] self.columnKey = '' self.detail_columns = [] + self.resource = '' class CRUDCollectionMethod(NamedTuple): @@ -330,6 +336,7 @@ class CRUDEndpoint: actions: Optional[List[TableAction]] = None, permissions: Optional[List[str]] = None, forms: Optional[List[Form]] = None, column_key: Optional[str] = None, + resource: Optional[str] = None, meta: CRUDMeta = CRUDMeta(), get_all: Optional[CRUDCollectionMethod] = None, create: Optional[CRUDCollectionMethod] = None, delete: Optional[CRUDCollectionMethod] = None, @@ -352,6 +359,7 @@ class CRUDEndpoint: self.detail_columns = detail_columns if detail_columns is not None else [] self.extra_endpoints = extra_endpoints if extra_endpoints is not None else [] self.selection_type = selection_type + self.resource = resource def __call__(self, cls: Any): self.create_crud_class(cls) @@ -415,6 +423,7 @@ class CRUDEndpoint: self.generate_forms(model_key) self.set_permissions() self.set_column_key() + self.set_table_resource() self.get_detail_columns() selection_type = self.__class__.outer_self.selection_type self.__class__.outer_self.meta.table.set_selection_type(selection_type) @@ -468,6 +477,10 @@ class CRUDEndpoint: if self.__class__.outer_self.column_key: self.outer_self.meta.columnKey = self.__class__.outer_self.column_key + def set_table_resource(self): + if self.__class__.outer_self.resource: + self.outer_self.meta.resource = self.__class__.outer_self.resource + class_name = self.router.path.replace('/', '') meta_class = type(f'{class_name}_CRUDClassMetadata', (RESTController,), @@ -478,6 +491,7 @@ class CRUDEndpoint: 'generate_forms': generate_forms, 'set_permissions': set_permissions, 'set_column_key': set_column_key, + 'set_table_resource': set_table_resource, 'get_detail_columns': get_detail_columns, 'outer_self': self, }) diff --git a/src/pybind/mgr/dashboard/controllers/ceph_users.py b/src/pybind/mgr/dashboard/controllers/ceph_users.py index e1bdc157091..022f8f36c42 100644 --- a/src/pybind/mgr/dashboard/controllers/ceph_users.py +++ b/src/pybind/mgr/dashboard/controllers/ceph_users.py @@ -174,7 +174,7 @@ edit_form = Form(path='/cluster/user/edit', TableAction(name='Create', permission='create', icon=Icon.ADD.value, routerLink='/cluster/user/create'), TableAction(name='Edit', permission='update', icon=Icon.EDIT.value, - click='edit'), + click='edit', routerLink='/cluster/user/edit'), TableAction(name='Delete', permission='delete', icon=Icon.DESTROY.value, click='delete', disable=True), TableAction(name='Import', permission='create', icon=Icon.IMPORT.value, @@ -185,6 +185,7 @@ edit_form = Form(path='/cluster/user/edit', permissions=[Scope.CONFIG_OPT], forms=[create_form, edit_form, import_user_form], column_key='entity', + resource='user', get_all=CRUDCollectionMethod( func=CephUserEndpoints.user_list, doc=EndpointDoc("Get Ceph Users") diff --git a/src/pybind/mgr/dashboard/controllers/cephfs.py b/src/pybind/mgr/dashboard/controllers/cephfs.py index ed83f91d0c9..61b31eb809f 100644 --- a/src/pybind/mgr/dashboard/controllers/cephfs.py +++ b/src/pybind/mgr/dashboard/controllers/cephfs.py @@ -335,13 +335,16 @@ class CephFS(RESTController): standby_table = self.get_standby_table(fsmap['standbys'], mds_versions) + flags = mdsmap['flags_state'] + return { "cephfs": { "id": fs_id, "name": mdsmap['fs_name'], "client_count": client_count, "ranks": rank_table, - "pools": pools_table + "pools": pools_table, + "flags": flags, }, "standbys": standby_table, "versions": mds_versions @@ -673,7 +676,7 @@ class CephFsUi(CephFS): @APIDoc('CephFS Subvolume Management API', 'CephFSSubvolume') class CephFSSubvolume(RESTController): - def get(self, vol_name: str, group_name: str = ""): + def get(self, vol_name: str, group_name: str = "", info=True): params = {'vol_name': vol_name} if group_name: params['group_name'] = group_name @@ -684,15 +687,17 @@ class CephFSSubvolume(RESTController): f'Failed to list subvolumes for volume {vol_name}: {err}' ) subvolumes = json.loads(out) - for subvolume in subvolumes: - params['sub_name'] = subvolume['name'] - error_code, out, err = mgr.remote('volumes', '_cmd_fs_subvolume_info', None, - params) - if error_code != 0: - raise DashboardException( - f'Failed to get info for subvolume {subvolume["name"]}: {err}' - ) - subvolume['info'] = json.loads(out) + + if info: + for subvolume in subvolumes: + params['sub_name'] = subvolume['name'] + error_code, out, err = mgr.remote('volumes', '_cmd_fs_subvolume_info', None, + params) + if error_code != 0: + raise DashboardException( + f'Failed to get info for subvolume {subvolume["name"]}: {err}' + ) + subvolume['info'] = json.loads(out) return subvolumes @RESTController.Resource('GET') @@ -749,12 +754,27 @@ class CephFSSubvolume(RESTController): component='cephfs') return f'Subvolume {subvol_name} removed successfully' + @RESTController.Resource('GET') + def exists(self, vol_name: str, group_name=''): + params = {'vol_name': vol_name} + if group_name: + params['group_name'] = group_name + error_code, out, err = mgr.remote( + 'volumes', '_cmd_fs_subvolume_exist', None, params) + if error_code != 0: + raise DashboardException( + f'Failed to check if subvolume exists: {err}' + ) + if out == 'no subvolume exists': + return False + return True + @APIRouter('/cephfs/subvolume/group', Scope.CEPHFS) @APIDoc("Cephfs Subvolume Group Management API", "CephfsSubvolumeGroup") class CephFSSubvolumeGroups(RESTController): - def get(self, vol_name): + def get(self, vol_name, info=True): if not vol_name: raise DashboardException( f'Error listing subvolume groups for {vol_name}') @@ -764,15 +784,17 @@ class CephFSSubvolumeGroups(RESTController): raise DashboardException( f'Error listing subvolume groups for {vol_name}') subvolume_groups = json.loads(out) - for group in subvolume_groups: - error_code, out, err = mgr.remote('volumes', '_cmd_fs_subvolumegroup_info', - None, {'vol_name': vol_name, - 'group_name': group['name']}) - if error_code != 0: - raise DashboardException( - f'Failed to get info for subvolume group {group["name"]}: {err}' - ) - group['info'] = json.loads(out) + + if info: + for group in subvolume_groups: + error_code, out, err = mgr.remote('volumes', '_cmd_fs_subvolumegroup_info', + None, {'vol_name': vol_name, + 'group_name': group['name']}) + if error_code != 0: + raise DashboardException( + f'Failed to get info for subvolume group {group["name"]}: {err}' + ) + group['info'] = json.loads(out) return subvolume_groups @RESTController.Resource('GET') @@ -813,3 +835,61 @@ class CephFSSubvolumeGroups(RESTController): f'Failed to delete subvolume group {group_name}: {err}' ) return f'Subvolume group {group_name} removed successfully' + + +@APIRouter('/cephfs/subvolume/snapshot', Scope.CEPHFS) +@APIDoc("Cephfs Subvolume Snapshot Management API", "CephfsSubvolumeSnapshot") +class CephFSSubvolumeSnapshots(RESTController): + def get(self, vol_name: str, subvol_name, group_name: str = '', info=True): + params = {'vol_name': vol_name, 'sub_name': subvol_name} + if group_name: + params['group_name'] = group_name + error_code, out, err = mgr.remote('volumes', '_cmd_fs_subvolume_snapshot_ls', None, + params) + if error_code != 0: + raise DashboardException( + f'Failed to list subvolume snapshots for subvolume {subvol_name}: {err}' + ) + snapshots = json.loads(out) + + if info: + for snapshot in snapshots: + params['snap_name'] = snapshot['name'] + error_code, out, err = mgr.remote('volumes', '_cmd_fs_subvolume_snapshot_info', + None, params) + if error_code != 0: + raise DashboardException( + f'Failed to get info for subvolume snapshot {snapshot["name"]}: {err}' + ) + snapshot['info'] = json.loads(out) + return snapshots + + +@APIRouter('/cephfs/snaphost/schedule', Scope.CEPHFS) +@APIDoc("Cephfs Snapshot Scheduling API", "CephFSSnapshotSchedule") +class CephFSSnapshotSchedule(RESTController): + + def list(self, fs: str, path: str = '/', recursive: bool = True): + error_code, out, err = mgr.remote('snap_schedule', 'snap_schedule_list', + path, recursive, fs, 'plain') + + if len(out) == 0: + return [] + + snapshot_schedule_list = out.split('\n') + output = [] + + for snap in snapshot_schedule_list: + current_path = snap.strip().split(' ')[0] + error_code, status_out, err = mgr.remote('snap_schedule', 'snap_schedule_get', + current_path, fs, 'plain') + output.append(json.loads(status_out)) + + output_json = json.dumps(output) + + if error_code != 0: + raise DashboardException( + f'Failed to get list of snapshot schedules for path {path}: {err}' + ) + + return json.loads(output_json) diff --git a/src/pybind/mgr/dashboard/controllers/rgw.py b/src/pybind/mgr/dashboard/controllers/rgw.py index 9bc1b489ede..661dc8afb6d 100644 --- a/src/pybind/mgr/dashboard/controllers/rgw.py +++ b/src/pybind/mgr/dashboard/controllers/rgw.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +# pylint: disable=C0302 import json import logging import re @@ -290,6 +291,10 @@ class RgwBucket(RgwRESTController): rgw_client = RgwClient.admin_instance() return rgw_client.get_bucket_policy(bucket) + def _set_tags(self, bucket_name, tags, daemon_name, owner): + rgw_client = RgwClient.instance(owner, daemon_name) + return rgw_client.set_tags(bucket_name, tags) + @staticmethod def strip_tenant_from_bucket_name(bucket_name): # type (str) -> str @@ -355,7 +360,7 @@ class RgwBucket(RgwRESTController): lock_enabled='false', lock_mode=None, lock_retention_period_days=None, lock_retention_period_years=None, encryption_state='false', - encryption_type=None, key_id=None, daemon_name=None): + encryption_type=None, key_id=None, tags=None, daemon_name=None): lock_enabled = str_to_bool(lock_enabled) encryption_state = str_to_bool(encryption_state) try: @@ -371,6 +376,9 @@ class RgwBucket(RgwRESTController): if encryption_state: self._set_encryption(bucket, encryption_type, key_id, daemon_name, uid) + if tags: + self._set_tags(bucket, tags, daemon_name, uid) + return result except RequestException as e: # pragma: no cover - handling is too obvious raise DashboardException(e, http_status_code=500, component='rgw') @@ -380,7 +388,7 @@ class RgwBucket(RgwRESTController): encryption_state='false', encryption_type=None, key_id=None, mfa_delete=None, mfa_token_serial=None, mfa_token_pin=None, lock_mode=None, lock_retention_period_days=None, - lock_retention_period_years=None, daemon_name=None): + lock_retention_period_years=None, tags=None, daemon_name=None): encryption_state = str_to_bool(encryption_state) # When linking a non-tenant-user owned bucket to a tenanted user, we # need to prefix bucket name with '/'. e.g. photos -> /photos @@ -420,6 +428,8 @@ class RgwBucket(RgwRESTController): self._set_encryption(bucket_name, encryption_type, key_id, daemon_name, uid) if encryption_status['Status'] == 'Enabled' and (not encryption_state): self._delete_encryption(bucket_name, daemon_name, uid) + if tags: + self._set_tags(bucket_name, tags, daemon_name, uid) return self._append_bid(result) def delete(self, bucket, purge_objects='true', daemon_name=None): @@ -717,6 +727,36 @@ class RGWRoleEndpoints: rgw_client.create_role(role_name, role_path, role_assume_policy_doc) return f'Role {role_name} created successfully' + @staticmethod + def role_update(_, role_name: str, max_session_duration: str): + assert role_name + assert max_session_duration + # convert max_session_duration which is in hours to seconds + max_session_duration = int(float(max_session_duration) * 3600) + rgw_client = RgwClient.admin_instance() + rgw_client.update_role(role_name, str(max_session_duration)) + return f'Role {role_name} updated successfully' + + @staticmethod + def role_delete(_, role_name: str): + assert role_name + rgw_client = RgwClient.admin_instance() + rgw_client.delete_role(role_name) + return f'Role {role_name} deleted successfully' + + @staticmethod + def model(role_name: str): + assert role_name + rgw_client = RgwClient.admin_instance() + role = rgw_client.get_role(role_name) + model = {'role_name': '', 'max_session_duration': ''} + model['role_name'] = role['RoleName'] + + # convert maxsessionduration which is in seconds to hours + if role['MaxSessionDuration']: + model['max_session_duration'] = role['MaxSessionDuration'] / 3600 + return model + # pylint: disable=C0301 assume_role_policy_help = ( @@ -725,6 +765,10 @@ assume_role_policy_help = ( 'target="_blank">click here.</a>' ) +max_session_duration_help = ( + 'The maximum session duration (in hours) that you want to set for the specified role.This setting can have a value from 1 hour to 12 hours.' # noqa: E501 +) + create_container = VerticalContainer('Create Role', 'create_role', fields=[ FormField('Role name', 'role_name', validators=[Validator.RGW_ROLE_NAME]), FormField('Path', 'role_path', validators=[Validator.RGW_ROLE_PATH]), @@ -734,29 +778,58 @@ create_container = VerticalContainer('Create Role', 'create_role', fields=[ field_type='textarea', validators=[Validator.JSON]), ]) -create_role_form = Form(path='/rgw/roles/create', + +edit_container = VerticalContainer('Edit Role', 'edit_role', fields=[ + FormField('Role name', 'role_name', readonly=True), + FormField('Max Session Duration', 'max_session_duration', + help=max_session_duration_help, + validators=[Validator.RGW_ROLE_SESSION_DURATION]) +]) + +create_role_form = Form(path='/create', root_container=create_container, task_info=FormTaskInfo("IAM RGW Role '{role_name}' created successfully", ['role_name']), method_type=MethodType.POST.value) +edit_role_form = Form(path='/edit', + root_container=edit_container, + task_info=FormTaskInfo("IAM RGW Role '{role_name}' edited successfully", + ['role_name']), + method_type=MethodType.PUT.value, + model_callback=RGWRoleEndpoints.model) + @CRUDEndpoint( router=APIRouter('/rgw/roles', Scope.RGW), doc=APIDoc("List of RGW roles", "RGW"), actions=[ TableAction(name='Create', permission='create', icon=Icon.ADD.value, - routerLink='/rgw/roles/create') + routerLink='/rgw/roles/create'), + TableAction(name='Edit', permission='update', icon=Icon.EDIT.value, + click='edit', routerLink='/rgw/roles/edit'), + TableAction(name='Delete', permission='delete', icon=Icon.DESTROY.value, + click='delete', disable=True), ], - forms=[create_role_form], - permissions=[Scope.CONFIG_OPT], + forms=[create_role_form, edit_role_form], + column_key='RoleName', + resource='Role', + permissions=[Scope.RGW], get_all=CRUDCollectionMethod( func=RGWRoleEndpoints.role_list, doc=EndpointDoc("List RGW roles") ), create=CRUDCollectionMethod( func=RGWRoleEndpoints.role_create, - doc=EndpointDoc("Create Ceph User") + doc=EndpointDoc("Create RGW role") + ), + edit=CRUDCollectionMethod( + func=RGWRoleEndpoints.role_update, + doc=EndpointDoc("Edit RGW role") + ), + delete=CRUDCollectionMethod( + func=RGWRoleEndpoints.role_delete, + doc=EndpointDoc("Delete RGW role") ), set_column={ "CreateDate": {'cellTemplate': 'date'}, diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/common/table-helper.feature.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/common/table-helper.feature.po.ts index 82a2c7c35cd..330950acd44 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/common/table-helper.feature.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/common/table-helper.feature.po.ts @@ -54,6 +54,13 @@ Then('I should not see a row with {string}', (row: string) => { ); }); +Then('I should see a table in the expanded row', () => { + cy.get('.datatable-row-detail').within(() => { + cy.get('cd-table').should('exist'); + cy.get('datatable-scroller, .empty-row'); + }); +}); + Then('I should not see a row with {string} in the expanded row', (row: string) => { cy.get('.datatable-row-detail').within(() => { cy.get('cd-table .search input').first().clear().type(row); @@ -133,3 +140,9 @@ And('I should see row {string} have {string} on this tab', (row: string, options }); } }); + +Then('I should see an alert {string} in the expanded row', (alert: string) => { + cy.get('.datatable-row-detail').within(() => { + cy.get('.alert-panel-text').contains(alert); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/filesystems.e2e-spec.feature b/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/filesystems.e2e-spec.feature index 2c08fb56eff..54fb1a8139e 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/filesystems.e2e-spec.feature +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/filesystems.e2e-spec.feature @@ -12,19 +12,20 @@ Feature: CephFS Management And I click on "Create File System" button Then I should see a row with "test_cephfs" - Scenario: Edit CephFS Volume - Given I am on the "cephfs" page - And I select a row "test_cephfs" - And I click on "Edit" button - And enter "name" "test_cephfs_edit" - And I click on "Edit File System" button - Then I should see a row with "test_cephfs_edit" + # Should be uncommented once the pre-requisite is fixed + # Scenario: Edit CephFS Volume + # Given I am on the "cephfs" page + # And I select a row "test_cephfs" + # And I click on "Edit" button + # And enter "name" "test_cephfs_edit" + # And I click on "Edit File System" button + # Then I should see a row with "test_cephfs_edit" Scenario: Remove CephFS Volume Given I am on the "cephfs" page - And I select a row "test_cephfs_edit" + And I select a row "test_cephfs" And I click on "Remove" button from the table actions Then I should see the modal And I check the tick box in modal And I click on "Remove File System" button - Then I should not see a row with "test_cephfs_edit" + Then I should not see a row with "test_cephfs" diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/snapshots.e2e-spec.feature b/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/snapshots.e2e-spec.feature new file mode 100644 index 00000000000..002282172bb --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/snapshots.e2e-spec.feature @@ -0,0 +1,53 @@ +Feature: CephFS Snapshot Management + + Goal: To test out the CephFS snapshot management features + + Background: Login + Given I am logged in + + Scenario: Create a CephFS Volume + Given I am on the "cephfs" page + And I click on "Create" button + And enter "name" "test_cephfs" + And I click on "Create File System" button + Then I should see a row with "test_cephfs" + + Scenario: Snapshots tab without a subvolume + Given I am on the "cephfs" page + When I expand the row "test_cephfs" + And I go to the "Snapshots" tab + Then I should see an alert "No subvolumes are present" in the expanded row + + Scenario: Create a CephFS Subvolume + Given I am on the "cephfs" page + When I expand the row "test_cephfs" + And I go to the "Subvolumes" tab + And I click on "Create" button from the expanded row + And enter "subvolumeName" "test_subvolume" in the modal + And I click on "Create Subvolume" button + Then I should see a row with "test_subvolume" in the expanded row + + Scenario: Show the CephFS Snapshots view + Given I am on the "cephfs" page + When I expand the row "test_cephfs" + And I go to the "Snapshots" tab + Then I should see a table in the expanded row + + Scenario: Remove a CephFS Subvolume + Given I am on the "cephfs" page + When I expand the row "test_cephfs" + And I go to the "Subvolumes" tab + When I select a row "test_subvolume" in the expanded row + And I click on "Remove" button from the table actions in the expanded row + And I check the tick box in modal + And I click on "Remove Subvolume" button + Then I should not see a row with "test_subvolume" in the expanded row + + Scenario: Remove CephFS Volume + Given I am on the "cephfs" page + And I select a row "test_cephfs" + And I click on "Remove" button from the table actions + Then I should see the modal + And I check the tick box in modal + And I click on "Remove File System" button + Then I should not see a row with "test_cephfs" diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/subvolume-groups.e2e-spec.feature b/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/subvolume-groups.e2e-spec.feature index 66e3f726a66..e53df64771d 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/subvolume-groups.e2e-spec.feature +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/subvolume-groups.e2e-spec.feature @@ -21,7 +21,7 @@ Feature: CephFS Subvolume Group management And I click on "Create Subvolume group" button Then I should see a row with "test_subvolume_group" in the expanded row - Scenario: Edit a CephFS Subvolume + Scenario: Edit a CephFS Subvolume Group Given I am on the "cephfs" page When I expand the row "test_cephfs" And I go to the "Subvolume groups" tab @@ -31,7 +31,7 @@ Feature: CephFS Subvolume Group management And I click on "Edit Subvolume group" button Then I should see row "test_subvolume_group" of the expanded row to have a usage bar - Scenario: Remove a CephFS Subvolume + Scenario: Remove a CephFS Subvolume Group Given I am on the "cephfs" page When I expand the row "test_cephfs" And I go to the "Subvolume groups" tab diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/roles.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/roles.e2e-spec.ts index 597f7d1be88..80a8b0ec902 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/roles.e2e-spec.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/roles.e2e-spec.ts @@ -9,11 +9,21 @@ describe('RGW roles page', () => { }); describe('Create, Edit & Delete rgw roles', () => { + const roleName = 'testRole'; + it('should create rgw roles', () => { roles.navigateTo('create'); - roles.create('testRole', '/', '{}'); + roles.create(roleName, '/', '{}'); roles.navigateTo(); - roles.checkExist('testRole', true); + roles.checkExist(roleName, true); + }); + + it('should edit rgw role', () => { + roles.edit(roleName, 3); + }); + + it('should delete rgw role', () => { + roles.delete(roleName); }); }); }); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/roles.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/roles.po.ts index b72ca5df9a7..717655b2f08 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/roles.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/roles.po.ts @@ -11,18 +11,36 @@ export class RolesPageHelper extends PageHelper { columnIndex = { roleName: 2, path: 3, - arn: 4 + arn: 4, + createDate: 5, + maxSessionDuration: 6 }; @PageHelper.restrictTo(pages.create.url) create(name: string, path: string, policyDocument: string) { - cy.get('#formly_3_string_role_name_0').type(name); - cy.get('#formly_3_textarea_role_assume_policy_doc_2').type(policyDocument); - cy.get('#formly_3_string_role_path_1').type(path); + cy.get('[id$="string_role_name_0"]').type(name); + cy.get('[id$="role_assume_policy_doc_2"]').type(policyDocument); + cy.get('[id$="role_path_1"]').type(path); cy.get("[aria-label='Create Role']").should('exist').click(); cy.get('cd-crud-table').should('exist'); } + edit(name: string, maxSessionDuration: number) { + this.navigateEdit(name); + cy.get('[id$="max_session_duration_1"]').clear().type(maxSessionDuration.toString()); + cy.get("[aria-label='Edit Role']").should('exist').click(); + cy.get('cd-crud-table').should('exist'); + + this.getTableCell(this.columnIndex.roleName, name) + .click() + .parent() + .find(`datatable-body-cell:nth-child(${this.columnIndex.maxSessionDuration})`) + .should(($elements) => { + const roleName = $elements.map((_, el) => el.textContent).get(); + expect(roleName).to.include(`${maxSessionDuration} hours`); + }); + } + @PageHelper.restrictTo(pages.index.url) checkExist(name: string, exist: boolean) { this.getTableCell(this.columnIndex.roleName, name).should(($elements) => { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts index 38ae3a4affd..2ba634fa25d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts @@ -361,7 +361,7 @@ const routes: Routes = [ data: { breadcrumbs: ActionLabels.CREATE } }, { - path: `${URLVerbs.EDIT}/:name`, + path: `${URLVerbs.EDIT}/:id`, component: CephfsVolumeFormComponent, data: { breadcrumbs: ActionLabels.EDIT } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.html index 05235d16ccd..f8d0fa80320 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.html @@ -15,6 +15,18 @@ i18n *ngIf="!editing">Orchestrator is not configured. Deploy MDS daemons manually after creating the volume.</cd-alert-panel> </ng-container> + + <cd-alert-panel type="info" + class="m-3" + spacingClass="mt-3" + i18n + *ngIf="editing && disableRename"> + <p>The File System can only be renamed if it is shutdown and `refuse_client_session` is set to true. + Follow the steps below in the command line and refresh the page:</p> + <cd-code-block [codes]="[fsFailCmd]"></cd-code-block> + <cd-code-block [codes]="[fsSetCmd]"></cd-code-block> + </cd-alert-panel> + <div class="card-body"> <!-- Name --> <div class="form-group row"> @@ -98,6 +110,7 @@ <cd-form-button-panel (submitActionEvent)="submit()" [form]="form" [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)" + [disabled]="editing ? disableRename: false" wrappingClass="text-right"></cd-form-button-panel> </div> </div> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.spec.ts index 461f4bca052..520f726d555 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.spec.ts @@ -78,5 +78,42 @@ describe('CephfsVolumeFormComponent', () => { expect(label).toBeNull(); expect(hosts).toBeNull(); }); + + it('should disable renaming and show info alert if disableRename is true', () => { + component.disableRename = true; + component.ngOnInit(); + fixture.detectChanges(); + const alertPanel = fixture.debugElement.query(By.css('cd-alert-panel')); + expect(alertPanel).not.toBeNull(); + }); + + it('should not show the alert if disableRename is false', () => { + component.disableRename = false; + component.ngOnInit(); + fixture.detectChanges(); + const alertPanel = fixture.debugElement.query(By.css('cd-alert-panel')); + expect(alertPanel).toBeNull(); + }); + + it('should disable the submit button only if disableRename is true', () => { + component.disableRename = true; + component.ngOnInit(); + fixture.detectChanges(); + const submitButton = fixture.debugElement.query(By.css('button[type=submit]')); + expect(submitButton.nativeElement.disabled).toBeTruthy(); + + // the submit button should only be disabled when the form is in edit mode + component.editing = false; + component.ngOnInit(); + fixture.detectChanges(); + expect(submitButton.nativeElement.disabled).toBeFalsy(); + + // submit button should be enabled if disableRename is false + component.editing = true; + component.disableRename = false; + component.ngOnInit(); + fixture.detectChanges(); + expect(submitButton.nativeElement.disabled).toBeFalsy(); + }); }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.ts index 6d84e33c7b6..b0f90979c25 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.ts @@ -51,6 +51,11 @@ export class CephfsVolumeFormComponent extends CdForm implements OnInit { labels: string[]; hasOrchestrator: boolean; currentVolumeName: string; + fsId: number; + disableRename: boolean = true; + + fsFailCmd: string; + fsSetCmd: string; constructor( private router: Router, @@ -101,9 +106,22 @@ export class CephfsVolumeFormComponent extends CdForm implements OnInit { ngOnInit() { if (this.editing) { - this.route.params.subscribe((params: { name: string }) => { - this.currentVolumeName = params.name; + this.route.params.subscribe((params: { id: string }) => { + this.fsId = Number(params.id); + }); + + this.cephfsService.getCephfs(this.fsId).subscribe((resp: object) => { + this.currentVolumeName = resp['cephfs']['name']; this.form.get('name').setValue(this.currentVolumeName); + + this.disableRename = !( + !resp['cephfs']['flags']['joinable'] && resp['cephfs']['flags']['refuse_client_session'] + ); + if (this.disableRename) { + this.form.get('name').disable(); + this.fsFailCmd = `ceph fs fail ${this.currentVolumeName}`; + this.fsSetCmd = `ceph fs set ${this.currentVolumeName} refuse_client_session true`; + } }); } else { const hostContext = new CdTableFetchDataContext(() => undefined); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-list/cephfs-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-list/cephfs-list.component.ts index 0d55845ab59..26e79727c12 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-list/cephfs-list.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-list/cephfs-list.component.ts @@ -87,7 +87,7 @@ export class CephfsListComponent extends ListWithDetails implements OnInit { permission: 'update', icon: Icons.edit, click: () => - this.router.navigate([this.urlBuilder.getEdit(this.selection.first().mdsmap.fs_name)]) + this.router.navigate([this.urlBuilder.getEdit(String(this.selection.first().id))]) }, { permission: 'delete', diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.html new file mode 100644 index 00000000000..2e270057d57 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.html @@ -0,0 +1,58 @@ +<ng-container *ngIf="isLoading$ | async"> + <cd-loading-panel> + <span i18n>Loading snapshot schedules...</span> + </cd-loading-panel> +</ng-container> + +<ng-template #pathTpl + let-row="row"> + <span + class="fw-bold" + [ngbTooltip]="fullpathTpl" + triggers="click:blur">{{row.path | path}}</span> + + <span *ngIf="row.active; else inactiveStatusTpl"> + <i [ngClass]="[icons.success, icons.large]" + ngbTooltip="{{row.path}} is active" + class="text-success"></i> + </span> + + <ng-template #inactiveStatusTpl> + <i [ngClass]="[icons.warning, icons.large]" + class="text-warning" + ngbTooltip="{{row.path}} has been deactivated"></i> + </ng-template> + + <ng-template #fullpathTpl> + <span data-toggle="tooltip" + [title]="row.path" + class="font-monospace">{{ row.path }} + <cd-copy-2-clipboard-button *ngIf="row.path" + [source]="row.path" + [byId]="false" + [showIconOnly]="true"> + </cd-copy-2-clipboard-button> + </span> +</ng-template> + +</ng-template> + +<cd-table + [data]="snapshotSchedules$ | async" + columnMode="flex" + [columns]="columns" + selectionType="single" + [hasDetails]="false" + (fetchData)="fetchData()" + (updateSelection)="updateSelection($event)" +> + <div class="table-actions btn-toolbar"> + <cd-table-actions + [permission]="permissions.cephfs" + [selection]="selection" + class="btn-group" + [tableActions]="tableActions" + > + </cd-table-actions> + </div> +</cd-table> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.scss new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.scss diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.spec.ts new file mode 100644 index 00000000000..a20972f1cf8 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.spec.ts @@ -0,0 +1,30 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { CephfsSnapshotscheduleListComponent } from './cephfs-snapshotschedule-list.component'; +import { HttpClientTestingModule } from '@angular/common/http/testing'; +import { SharedModule } from '~/app/shared/shared.module'; +import { ToastrModule } from 'ngx-toastr'; +import { RouterTestingModule } from '@angular/router/testing'; +import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; +import { configureTestBed } from '~/testing/unit-test-helper'; + +describe('CephfsSnapshotscheduleListComponent', () => { + let component: CephfsSnapshotscheduleListComponent; + let fixture: ComponentFixture<CephfsSnapshotscheduleListComponent>; + + configureTestBed({ + declarations: [CephfsSnapshotscheduleListComponent], + imports: [HttpClientTestingModule, SharedModule, ToastrModule.forRoot(), RouterTestingModule], + providers: [NgbActiveModal] + }); + + beforeEach(() => { + fixture = TestBed.createComponent(CephfsSnapshotscheduleListComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.ts new file mode 100644 index 00000000000..d5f24e89728 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.ts @@ -0,0 +1,99 @@ +import { Component, Input, OnChanges, OnInit, SimpleChanges, ViewChild } from '@angular/core'; +import { NgbModalRef } from '@ng-bootstrap/ng-bootstrap'; +import { BehaviorSubject, Observable } from 'rxjs'; +import { finalize, shareReplay, switchMap } from 'rxjs/operators'; +import { CephfsSnapshotScheduleService } from '~/app/shared/api/cephfs-snapshot-schedule.service'; +import { CdForm } from '~/app/shared/forms/cd-form'; +import { CdTableAction } from '~/app/shared/models/cd-table-action'; +import { CdTableColumn } from '~/app/shared/models/cd-table-column'; +import { CdTableFetchDataContext } from '~/app/shared/models/cd-table-fetch-data-context'; +import { CdTableSelection } from '~/app/shared/models/cd-table-selection'; +import { Permissions } from '~/app/shared/models/permissions'; +import { SnapshotSchedule } from '~/app/shared/models/snapshot-schedule'; +import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; +import { ModalService } from '~/app/shared/services/modal.service'; +import { Icons } from '~/app/shared/enum/icons.enum'; +import { CellTemplate } from '~/app/shared/enum/cell-template.enum'; + +@Component({ + selector: 'cd-cephfs-snapshotschedule-list', + templateUrl: './cephfs-snapshotschedule-list.component.html', + styleUrls: ['./cephfs-snapshotschedule-list.component.scss'] +}) +export class CephfsSnapshotscheduleListComponent extends CdForm implements OnInit, OnChanges { + @Input() fsName!: string; + + @ViewChild('pathTpl', { static: true }) + pathTpl: any; + + snapshotSchedules$!: Observable<SnapshotSchedule[]>; + subject$ = new BehaviorSubject<SnapshotSchedule[]>([]); + isLoading$ = new BehaviorSubject<boolean>(true); + columns: CdTableColumn[] = []; + tableActions: CdTableAction[] = []; + context!: CdTableFetchDataContext; + selection = new CdTableSelection(); + permissions!: Permissions; + modalRef!: NgbModalRef; + errorMessage: string = ''; + selectedName: string = ''; + icons = Icons; + + constructor( + private snapshotScheduleService: CephfsSnapshotScheduleService, + private authStorageService: AuthStorageService, + private modalService: ModalService + ) { + super(); + this.permissions = this.authStorageService.getPermissions(); + } + + ngOnChanges(changes: SimpleChanges): void { + if (changes.fsName) { + this.subject$.next([]); + } + } + + ngOnInit(): void { + this.snapshotSchedules$ = this.subject$.pipe( + switchMap(() => + this.snapshotScheduleService + .getSnapshotScheduleList('/', this.fsName) + .pipe(finalize(() => this.isLoading$.next(false))) + ), + shareReplay(1) + ); + + this.columns = [ + { prop: 'path', name: $localize`Path`, flexGrow: 3, cellTemplate: this.pathTpl }, + { prop: 'subvol', name: $localize`Subvolume` }, + { prop: 'schedule', name: $localize`Repeat interval` }, + { prop: 'retention', name: $localize`Retention policy` }, + { prop: 'created_count', name: $localize`Created Count` }, + { prop: 'pruned_count', name: $localize`Deleted Count` }, + { prop: 'start', name: $localize`Start time`, cellTransformation: CellTemplate.timeAgo }, + { prop: 'created', name: $localize`Created`, cellTransformation: CellTemplate.timeAgo } + ]; + + this.tableActions = []; + } + + fetchData() { + this.subject$.next([]); + } + + updateSelection(selection: CdTableSelection) { + this.selection = selection; + } + + openModal(edit = false) { + this.modalService.show( + {}, + { + fsName: 'fs1', + isEdit: edit + }, + { size: 'lg' } + ); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-group/cephfs-subvolume-group.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-group/cephfs-subvolume-group.component.ts index 3807ae61b67..a91daf8cb93 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-group/cephfs-subvolume-group.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-group/cephfs-subvolume-group.component.ts @@ -1,5 +1,5 @@ -import { Component, Input, OnChanges, OnInit, ViewChild } from '@angular/core'; -import { Observable, ReplaySubject, of } from 'rxjs'; +import { Component, Input, OnChanges, OnInit, SimpleChanges, ViewChild } from '@angular/core'; +import { BehaviorSubject, Observable, of } from 'rxjs'; import { catchError, shareReplay, switchMap } from 'rxjs/operators'; import { CephfsSubvolumeGroupService } from '~/app/shared/api/cephfs-subvolume-group.service'; @@ -9,7 +9,6 @@ import { CdTableAction } from '~/app/shared/models/cd-table-action'; import { CdTableColumn } from '~/app/shared/models/cd-table-column'; import { CdTableFetchDataContext } from '~/app/shared/models/cd-table-fetch-data-context'; import { CdTableSelection } from '~/app/shared/models/cd-table-selection'; -import { CephfsSubvolumeGroup } from '~/app/shared/models/cephfs-subvolumegroup.model'; import { CephfsSubvolumegroupFormComponent } from '../cephfs-subvolumegroup-form/cephfs-subvolumegroup-form.component'; import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; @@ -18,6 +17,7 @@ import { Permissions } from '~/app/shared/models/permissions'; import { CriticalConfirmationModalComponent } from '~/app/shared/components/critical-confirmation-modal/critical-confirmation-modal.component'; import { FinishedTask } from '~/app/shared/models/finished-task'; import { TaskWrapperService } from '~/app/shared/services/task-wrapper.service'; +import { CephfsSubvolumeGroup } from '~/app/shared/models/cephfs-subvolume-group.model'; @Component({ selector: 'cd-cephfs-subvolume-group', @@ -52,7 +52,7 @@ export class CephfsSubvolumeGroupComponent implements OnInit, OnChanges { permissions: Permissions; subvolumeGroup$: Observable<CephfsSubvolumeGroup[]>; - subject = new ReplaySubject<CephfsSubvolumeGroup[]>(); + subject = new BehaviorSubject<CephfsSubvolumeGroup[]>([]); constructor( private cephfsSubvolumeGroup: CephfsSubvolumeGroupService, @@ -138,11 +138,13 @@ export class CephfsSubvolumeGroupComponent implements OnInit, OnChanges { } fetchData() { - this.subject.next(); + this.subject.next([]); } - ngOnChanges() { - this.subject.next(); + ngOnChanges(changes: SimpleChanges) { + if (changes.fsName) { + this.subject.next([]); + } } updateSelection(selection: CdTableSelection) { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-list/cephfs-subvolume-list.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-list/cephfs-subvolume-list.component.html index 29731bbbd1b..f840c8dab11 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-list/cephfs-subvolume-list.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-list/cephfs-subvolume-list.component.html @@ -1,21 +1,10 @@ <div class="row"> - <div class="col-sm-1"> - <h3 i18n>Groups</h3> - <ng-container *ngIf="subVolumeGroups$ | async as subVolumeGroups"> - <ul class="nav flex-column nav-pills"> - <li class="nav-item"> - <a class="nav-link" - [class.active]="!activeGroupName" - (click)="selectSubVolumeGroup()">Default</a> - </li> - <li class="nav-item" - *ngFor="let subVolumeGroup of subVolumeGroups"> - <a class="nav-link text-decoration-none text-break" - [class.active]="subVolumeGroup.name === activeGroupName" - (click)="selectSubVolumeGroup(subVolumeGroup.name)">{{subVolumeGroup.name}}</a> - </li> - </ul> - </ng-container> + <div class="col-sm-1" + *ngIf="subVolumeGroups$ | async as subVolumeGroups"> + <cd-vertical-navigation title="Groups" + [items]="subvolumeGroupList" + inputIdentifier="group-filter" + (emitActiveItem)="selectSubVolumeGroup($event)"></cd-vertical-navigation> </div> <div class="col-11 vertical-line"> <cd-table [data]="subVolumes$ | async" diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-list/cephfs-subvolume-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-list/cephfs-subvolume-list.component.ts index 3f679d27b96..4f9cf27db0f 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-list/cephfs-subvolume-list.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-list/cephfs-subvolume-list.component.ts @@ -1,6 +1,14 @@ -import { Component, Input, OnChanges, OnInit, TemplateRef, ViewChild } from '@angular/core'; -import { Observable, ReplaySubject, of } from 'rxjs'; -import { catchError, shareReplay, switchMap } from 'rxjs/operators'; +import { + Component, + Input, + OnChanges, + OnInit, + SimpleChanges, + TemplateRef, + ViewChild +} from '@angular/core'; +import { BehaviorSubject, Observable, of } from 'rxjs'; +import { catchError, switchMap, tap } from 'rxjs/operators'; import { CephfsSubvolumeService } from '~/app/shared/api/cephfs-subvolume.service'; import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { CellTemplate } from '~/app/shared/enum/cell-template.enum'; @@ -22,7 +30,7 @@ import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; import { CdForm } from '~/app/shared/forms/cd-form'; import { CriticalConfirmationModalComponent } from '~/app/shared/components/critical-confirmation-modal/critical-confirmation-modal.component'; import { CephfsSubvolumeGroupService } from '~/app/shared/api/cephfs-subvolume-group.service'; -import { CephfsSubvolumeGroup } from '~/app/shared/models/cephfs-subvolumegroup.model'; +import { CephfsSubvolumeGroup } from '~/app/shared/models/cephfs-subvolume-group.model'; @Component({ selector: 'cd-cephfs-subvolume-list', @@ -64,13 +72,16 @@ export class CephfsSubvolumeListComponent extends CdForm implements OnInit, OnCh subVolumes$: Observable<CephfsSubvolume[]>; subVolumeGroups$: Observable<CephfsSubvolumeGroup[]>; - subject = new ReplaySubject<CephfsSubvolume[]>(); - groupsSubject = new ReplaySubject<CephfsSubvolume[]>(); + subject = new BehaviorSubject<CephfsSubvolume[]>([]); + groupsSubject = new BehaviorSubject<CephfsSubvolume[]>([]); + + subvolumeGroupList: string[] = []; + subVolumesList: CephfsSubvolume[] = []; activeGroupName: string = ''; constructor( - private cephfsSubVolume: CephfsSubvolumeService, + private cephfsSubVolumeService: CephfsSubvolumeService, private actionLabels: ActionLabelsI18n, private modalService: ModalService, private authStorageService: AuthStorageService, @@ -146,11 +157,13 @@ export class CephfsSubvolumeListComponent extends CdForm implements OnInit, OnCh } ]; - this.getSubVolumes(); - this.subVolumeGroups$ = this.groupsSubject.pipe( switchMap(() => - this.cephfsSubvolumeGroupService.get(this.fsName).pipe( + this.cephfsSubvolumeGroupService.get(this.fsName, false).pipe( + tap((groups) => { + this.subvolumeGroupList = groups.map((group) => group.name); + this.subvolumeGroupList.unshift(''); + }), catchError(() => { this.context.error(); return of(null); @@ -161,12 +174,14 @@ export class CephfsSubvolumeListComponent extends CdForm implements OnInit, OnCh } fetchData() { - this.subject.next(); + this.subject.next([]); } - ngOnChanges() { - this.subject.next(); - this.groupsSubject.next(); + ngOnChanges(changes: SimpleChanges) { + if (changes.fsName) { + this.subject.next([]); + this.groupsSubject.next([]); + } } updateSelection(selection: CdTableSelection) { @@ -203,7 +218,7 @@ export class CephfsSubvolumeListComponent extends CdForm implements OnInit, OnCh this.taskWrapper .wrapTaskAroundCall({ task: new FinishedTask('cephfs/subvolume/remove', { subVolumeName: this.selectedName }), - call: this.cephfsSubVolume.remove( + call: this.cephfsSubVolumeService.remove( this.fsName, this.selectedName, this.activeGroupName, @@ -222,20 +237,19 @@ export class CephfsSubvolumeListComponent extends CdForm implements OnInit, OnCh selectSubVolumeGroup(subVolumeGroupName: string) { this.activeGroupName = subVolumeGroupName; - this.getSubVolumes(subVolumeGroupName); + this.getSubVolumes(); } - getSubVolumes(subVolumeGroupName = '') { + getSubVolumes() { this.subVolumes$ = this.subject.pipe( switchMap(() => - this.cephfsSubVolume.get(this.fsName, subVolumeGroupName).pipe( + this.cephfsSubVolumeService.get(this.fsName, this.activeGroupName).pipe( catchError(() => { this.context.error(); return of(null); }) ) - ), - shareReplay(1) + ) ); } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.html new file mode 100644 index 00000000000..de311723652 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.html @@ -0,0 +1,36 @@ +<ng-container *ngIf="isLoading"> + <cd-loading-panel> + <span i18n>Loading snapshots...</span> + </cd-loading-panel> +</ng-container> + +<div class="row" + *ngIf="isSubVolumesAvailable; else noGroupsTpl"> + <div class="col-sm-2"> + <cd-vertical-navigation title="Groups" + [items]="subvolumeGroupList" + inputIdentifier="group-filter" + (emitActiveItem)="selectSubVolumeGroup($event)"></cd-vertical-navigation> + </div> + <div class="col-sm-2 vertical-line" + *ngIf="subVolumes$ | async"> + <cd-vertical-navigation title="Subvolumes" + [items]="subVolumesList" + (emitActiveItem)="selectSubVolume($event)" + inputIdentifier="subvol-filter"></cd-vertical-navigation> + </div> + <div class="col-8 vertical-line" + *ngIf="isSubVolumesAvailable"> + <cd-table [data]="snapshots$ | async" + columnMode="flex" + [columns]="columns" + selectionType="single" + [hasDetails]="false" + (fetchData)="fetchData()"></cd-table> + </div> +</div> +<ng-template #noGroupsTpl> + <cd-alert-panel type="info" + i18n + *ngIf="!isLoading">No subvolumes are present. Please create subvolumes to manage snapshots.</cd-alert-panel> +</ng-template> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.scss new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.scss diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.spec.ts new file mode 100644 index 00000000000..1d03cf2a8bc --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.spec.ts @@ -0,0 +1,38 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { CephfsSubvolumeSnapshotsListComponent } from './cephfs-subvolume-snapshots-list.component'; +import { HttpClientTestingModule } from '@angular/common/http/testing'; +import { SharedModule } from '~/app/shared/shared.module'; + +describe('CephfsSubvolumeSnapshotsListComponent', () => { + let component: CephfsSubvolumeSnapshotsListComponent; + let fixture: ComponentFixture<CephfsSubvolumeSnapshotsListComponent>; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [CephfsSubvolumeSnapshotsListComponent], + imports: [HttpClientTestingModule, SharedModule] + }).compileComponents(); + + fixture = TestBed.createComponent(CephfsSubvolumeSnapshotsListComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); + + it('should show loading when the items are loading', () => { + component.isLoading = true; + fixture.detectChanges(); + expect(fixture.nativeElement.querySelector('cd-loading-panel')).toBeTruthy(); + }); + + it('should show the alert panel when there are no subvolumes', () => { + component.isLoading = false; + component.subvolumeGroupList = []; + fixture.detectChanges(); + expect(fixture.nativeElement.querySelector('cd-alert-panel')).toBeTruthy(); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.ts new file mode 100644 index 00000000000..251314c3e86 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.ts @@ -0,0 +1,148 @@ +import { Component, Input, OnChanges, OnInit, SimpleChanges } from '@angular/core'; +import { BehaviorSubject, Observable, forkJoin, of } from 'rxjs'; +import { catchError, shareReplay, switchMap, tap } from 'rxjs/operators'; +import { CephfsSubvolumeGroupService } from '~/app/shared/api/cephfs-subvolume-group.service'; +import { CephfsSubvolumeService } from '~/app/shared/api/cephfs-subvolume.service'; +import { CellTemplate } from '~/app/shared/enum/cell-template.enum'; +import { CdTableColumn } from '~/app/shared/models/cd-table-column'; +import { CdTableFetchDataContext } from '~/app/shared/models/cd-table-fetch-data-context'; +import { CephfsSubvolume, SubvolumeSnapshot } from '~/app/shared/models/cephfs-subvolume.model'; + +@Component({ + selector: 'cd-cephfs-subvolume-snapshots-list', + templateUrl: './cephfs-subvolume-snapshots-list.component.html', + styleUrls: ['./cephfs-subvolume-snapshots-list.component.scss'] +}) +export class CephfsSubvolumeSnapshotsListComponent implements OnInit, OnChanges { + @Input() fsName: string; + + context: CdTableFetchDataContext; + columns: CdTableColumn[] = []; + + subVolumes$: Observable<CephfsSubvolume[]>; + snapshots$: Observable<any[]>; + snapshotSubject = new BehaviorSubject<SubvolumeSnapshot[]>([]); + subVolumeSubject = new BehaviorSubject<CephfsSubvolume[]>([]); + + subvolumeGroupList: string[] = []; + subVolumesList: string[]; + + activeGroupName = ''; + activeSubVolumeName = ''; + + isSubVolumesAvailable = false; + isLoading = true; + + observables: any = []; + + constructor( + private cephfsSubvolumeGroupService: CephfsSubvolumeGroupService, + private cephfsSubvolumeService: CephfsSubvolumeService + ) {} + + ngOnInit(): void { + this.columns = [ + { + name: $localize`Name`, + prop: 'name', + flexGrow: 1 + }, + { + name: $localize`Created`, + prop: 'info.created_at', + flexGrow: 1, + cellTransformation: CellTemplate.timeAgo + }, + { + name: $localize`Pending Clones`, + prop: 'info.has_pending_clones', + flexGrow: 0.5, + cellTransformation: CellTemplate.badge, + customTemplateConfig: { + map: { + no: { class: 'badge-success' }, + yes: { class: 'badge-info' } + } + } + } + ]; + + this.cephfsSubvolumeGroupService + .get(this.fsName) + .pipe( + switchMap((groups) => { + // manually adding the group '_nogroup' to the list. + groups.unshift({ name: '' }); + + const observables = groups.map((group) => + this.cephfsSubvolumeService.existsInFs(this.fsName, group.name).pipe( + switchMap((resp) => { + if (resp) { + this.subvolumeGroupList.push(group.name); + } + return of(resp); // Emit the response + }) + ) + ); + + return forkJoin(observables); + }) + ) + .subscribe(() => { + if (this.subvolumeGroupList.length) { + this.isSubVolumesAvailable = true; + } + this.isLoading = false; + }); + } + + ngOnChanges(changes: SimpleChanges): void { + if (changes.fsName) { + this.subVolumeSubject.next([]); + } + } + + selectSubVolumeGroup(subVolumeGroupName: string) { + this.activeGroupName = subVolumeGroupName; + this.getSubVolumes(); + } + + selectSubVolume(subVolumeName: string) { + this.activeSubVolumeName = subVolumeName; + this.getSubVolumesSnapshot(); + } + + getSubVolumes() { + this.subVolumes$ = this.subVolumeSubject.pipe( + switchMap(() => + this.cephfsSubvolumeService.get(this.fsName, this.activeGroupName, false).pipe( + tap((resp) => { + this.subVolumesList = resp.map((subVolume) => subVolume.name); + this.activeSubVolumeName = resp[0].name; + this.getSubVolumesSnapshot(); + }) + ) + ) + ); + } + + getSubVolumesSnapshot() { + this.snapshots$ = this.snapshotSubject.pipe( + switchMap(() => + this.cephfsSubvolumeService + .getSnapshots(this.fsName, this.activeSubVolumeName, this.activeGroupName) + .pipe( + catchError(() => { + this.context.error(); + return of(null); + }) + ) + ), + shareReplay(1) + ); + } + + fetchData() { + this.snapshotSubject.next([]); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-tabs/cephfs-tabs.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-tabs/cephfs-tabs.component.html index 0ad69ccf50a..87268858be5 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-tabs/cephfs-tabs.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-tabs/cephfs-tabs.component.html @@ -1,63 +1,94 @@ <ng-container *ngIf="selection"> - <nav ngbNav - #nav="ngbNav" - (navChange)="softRefresh()" - class="nav-tabs" - cdStatefulTab="cephfs-tabs"> + <nav + ngbNav + #nav="ngbNav" + (navChange)="softRefresh()" + class="nav-tabs" + cdStatefulTab="cephfs-tabs" + > <ng-container ngbNavItem="details"> - <a ngbNavLink - i18n>Details</a> + <a + ngbNavLink + i18n>Details</a> <ng-template ngbNavContent> - <cd-cephfs-detail [data]="details"> - </cd-cephfs-detail> + <cd-cephfs-detail [data]="details"> </cd-cephfs-detail> </ng-template> </ng-container> <ng-container ngbNavItem="subvolumes"> - <a ngbNavLink - i18n>Subvolumes</a> + <a + ngbNavLink + i18n>Subvolumes</a> <ng-template ngbNavContent> - <cd-cephfs-subvolume-list [fsName]="selection.mdsmap.fs_name" - [pools]="details.pools"></cd-cephfs-subvolume-list> + <cd-cephfs-subvolume-list + [fsName]="selection.mdsmap.fs_name" + [pools]="details.pools" + ></cd-cephfs-subvolume-list> </ng-template> </ng-container> <ng-container ngbNavItem="subvolume-groups"> - <a ngbNavLink - i18n>Subvolume groups</a> + <a + ngbNavLink + i18n>Subvolume groups</a> <ng-template ngbNavContent> - <cd-cephfs-subvolume-group [fsName]="selection.mdsmap.fs_name" - [pools]="details.pools"> + <cd-cephfs-subvolume-group + [fsName]="selection.mdsmap.fs_name" + [pools]="details.pools"> </cd-cephfs-subvolume-group> </ng-template> </ng-container> + <ng-container ngbNavItem="snapshots"> + <a + ngbNavLink + i18n>Snapshots</a> + <ng-template ngbNavContent> + <cd-cephfs-subvolume-snapshots-list [fsName]="selection.mdsmap.fs_name"> + </cd-cephfs-subvolume-snapshots-list> + </ng-template> + </ng-container> + <ng-container ngbNavItem="snapshot-schedules"> + <a + ngbNavLink + i18n>Snapshot schedules</a> + <ng-template ngbNavContent> + <cd-cephfs-snapshotschedule-list + [fsName]="selection.mdsmap.fs_name" + ></cd-cephfs-snapshotschedule-list> + </ng-template> + </ng-container> <ng-container ngbNavItem="clients"> <a ngbNavLink> <ng-container i18n>Clients</ng-container> <span class="badge badge-pill badge-tab ms-1">{{ clients.data.length }}</span> </a> <ng-template ngbNavContent> - <cd-cephfs-clients [id]="id" - [clients]="clients" - (triggerApiUpdate)="refresh()"> + <cd-cephfs-clients + [id]="id" + [clients]="clients" + (triggerApiUpdate)="refresh()"> </cd-cephfs-clients> </ng-template> </ng-container> <ng-container ngbNavItem="directories"> - <a ngbNavLink - i18n>Directories</a> + <a + ngbNavLink + i18n>Directories</a> <ng-template ngbNavContent> <cd-cephfs-directories [id]="id"></cd-cephfs-directories> </ng-template> </ng-container> <ng-container ngbNavItem="performance-details"> - <a ngbNavLink - i18n>Performance Details</a> + <a + ngbNavLink + i18n>Performance Details</a> <ng-template ngbNavContent> - <cd-grafana i18n-title - title="CephFS MDS performance" - [grafanaPath]="'mds-performance?var-mds_servers=mds.' + grafanaId" - [type]="'metrics'" - uid="tbO9LAiZz" - grafanaStyle="one"> + <cd-grafana + i18n-title + title="CephFS MDS performance" + [grafanaPath]="'mds-performance?var-mds_servers=mds.' + grafanaId" + [type]="'metrics'" + uid="tbO9LAiZz" + grafanaStyle="one" + > </cd-grafana> </ng-template> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs.module.ts index a83e0f16870..53544ccd9ed 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs.module.ts @@ -19,6 +19,9 @@ import { CephfsSubvolumeListComponent } from './cephfs-subvolume-list/cephfs-sub import { CephfsSubvolumeFormComponent } from './cephfs-subvolume-form/cephfs-subvolume-form.component'; import { CephfsSubvolumeGroupComponent } from './cephfs-subvolume-group/cephfs-subvolume-group.component'; import { CephfsSubvolumegroupFormComponent } from './cephfs-subvolumegroup-form/cephfs-subvolumegroup-form.component'; +import { CephfsSubvolumeSnapshotsListComponent } from './cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component'; +import { CephfsSnapshotscheduleListComponent } from './cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component'; +import { DataTableModule } from '../../shared/datatable/datatable.module'; @NgModule({ imports: [ @@ -31,7 +34,8 @@ import { CephfsSubvolumegroupFormComponent } from './cephfs-subvolumegroup-form/ FormsModule, ReactiveFormsModule, NgbTypeaheadModule, - NgbTooltipModule + NgbTooltipModule, + DataTableModule ], declarations: [ CephfsDetailComponent, @@ -45,7 +49,9 @@ import { CephfsSubvolumegroupFormComponent } from './cephfs-subvolumegroup-form/ CephfsSubvolumeFormComponent, CephfsDirectoriesComponent, CephfsSubvolumeGroupComponent, - CephfsSubvolumegroupFormComponent + CephfsSubvolumegroupFormComponent, + CephfsSubvolumeSnapshotsListComponent, + CephfsSnapshotscheduleListComponent ] }) export class CephfsModule {} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.html new file mode 100644 index 00000000000..3e732e35568 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.html @@ -0,0 +1,59 @@ +<cd-modal [modalRef]="activeModal"> + <span class="modal-title" + i18n>{{ getMode() }} Tag</span> + + <ng-container class="modal-content"> + <form class="form" + #formDir="ngForm" + [formGroup]="form"> + <div class="modal-body"> + <!-- Key --> + <div class="form-group row"> + <label class="cd-col-form-label required" + for="key" + i18n>Key</label> + <div class="cd-col-form-input"> + <input type="text" + class="form-control" + formControlName="key" + id="key"> + <span class="invalid-feedback" + *ngIf="form.showError('key', formDir, 'required')" + i18n>This field is required.</span> + <span class="invalid-feedback" + *ngIf="form.showError('key', formDir, 'unique')" + i18n>This key must be unique.</span> + <span class="invalid-feedback" + *ngIf="form.showError('key', formDir, 'maxLength')" + i18n>Length of the key must be maximum of 128 characters</span> + </div> + </div> + + <!-- Value --> + <div class="form-group row"> + <label class="cd-col-form-label required" + for="value" + i18n>Value</label> + <div class="cd-col-form-input"> + <input id="value" + class="form-control" + type="text" + formControlName="value"> + <span *ngIf="form.showError('value', formDir, 'required')" + class="invalid-feedback" + i18n>This field is required.</span> + <span class="invalid-feedback" + *ngIf="form.showError('value', formDir, 'maxLength')" + i18n>Length of the value must be a maximum of 128 characters</span> + </div> + </div> + </div> + + <div class="modal-footer"> + <cd-form-button-panel (submitActionEvent)="onSubmit()" + [form]="form" + [submitText]="getMode()"></cd-form-button-panel> + </div> + </form> + </ng-container> + </cd-modal> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.scss new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.scss diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.spec.ts new file mode 100644 index 00000000000..a54e7eeee08 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.spec.ts @@ -0,0 +1,27 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { BucketTagModalComponent } from './bucket-tag-modal.component'; +import { HttpClientTestingModule } from '@angular/common/http/testing'; +import { ReactiveFormsModule } from '@angular/forms'; +import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; + +describe('BucketTagModalComponent', () => { + let component: BucketTagModalComponent; + let fixture: ComponentFixture<BucketTagModalComponent>; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [BucketTagModalComponent], + imports: [HttpClientTestingModule, ReactiveFormsModule], + providers: [NgbActiveModal] + }).compileComponents(); + + fixture = TestBed.createComponent(BucketTagModalComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.ts new file mode 100644 index 00000000000..5135539e5d4 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.ts @@ -0,0 +1,75 @@ +import { Component, EventEmitter, Output } from '@angular/core'; +import { Validators } from '@angular/forms'; +import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; +import _ from 'lodash'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; +import { CdFormBuilder } from '~/app/shared/forms/cd-form-builder'; +import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; +import { CdValidators } from '~/app/shared/forms/cd-validators'; + +@Component({ + selector: 'cd-bucket-tag-modal', + templateUrl: './bucket-tag-modal.component.html', + styleUrls: ['./bucket-tag-modal.component.scss'] +}) +export class BucketTagModalComponent { + @Output() + submitAction = new EventEmitter(); + + form: CdFormGroup; + editMode = false; + currentKeyTags: string[]; + storedKey: string; + + constructor( + private formBuilder: CdFormBuilder, + public activeModal: NgbActiveModal, + public actionLabels: ActionLabelsI18n + ) { + this.createForm(); + } + + private createForm() { + this.form = this.formBuilder.group({ + key: [ + null, + [ + Validators.required, + CdValidators.custom('unique', (value: string) => { + if (_.isEmpty(value) && !this.currentKeyTags) { + return false; + } + return this.storedKey !== value && this.currentKeyTags.includes(value); + }), + CdValidators.custom('maxLength', (value: string) => { + if (_.isEmpty(value)) return false; + return value.length > 128; + }) + ] + ], + value: [ + null, + [ + Validators.required, + CdValidators.custom('maxLength', (value: string) => { + if (_.isEmpty(value)) return false; + return value.length > 128; + }) + ] + ] + }); + } + + onSubmit() { + this.submitAction.emit(this.form.value); + this.activeModal.close(); + } + + getMode() { + return this.editMode ? this.actionLabels.EDIT : this.actionLabels.ADD; + } + + fillForm(tag: Record<string, string>) { + this.form.setValue(tag); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.html index f2447feab26..e96a89b234f 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.html @@ -100,6 +100,21 @@ </ng-container> </tbody> </table> + + <!-- Tags --> + <ng-container *ngIf="selection.tagset"> + <legend i18n>Tags</legend> + <table class="table table-striped table-bordered"> + <tbody> + <tr *ngFor="let tag of selection.tagset | keyvalue"> + <td i18n + class="bold w-25">{{tag.key}}</td> + <td class="w-75">{{ tag.value }}</td> + </tr> + </tbody> + </table> + </ng-container> + </ng-template> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.html index 761081c3744..a9704c0bdc8 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.html @@ -385,6 +385,32 @@ </div> </fieldset> + <!-- Tags --> + <legend class="cd-header" + i18n>Tags + <cd-helper>Tagging gives you a way to categorize storage</cd-helper> + </legend> + <span *ngFor="let tag of tags; let i=index;"> + <ng-container *ngTemplateOutlet="tagTpl; context:{index: i, tag: tag}"></ng-container> + </span> + + <div class="row"> + <div class="col-12"> + <strong *ngIf="tags.length > 19" + class="text-warning" + i18n>Maximum of 20 tags reached</strong> + <button type="button" + id="add-tag" + class="btn btn-light float-end my-3" + [disabled]="tags.length > 19" + (click)="showTagModal()"> + <i [ngClass]="[icons.add]"></i> + <ng-container i18n>Add tag</ng-container> + </button> + </div> + </div> + + </div> <div class="card-footer"> <cd-form-button-panel (submitActionEvent)="submit()" @@ -395,3 +421,37 @@ </div> </form> </div> + +<ng-template #tagTpl + let-tag="tag" + let-index="index"> + <div class="input-group my-2"> + <ng-container *ngFor="let config of tagConfig"> + <input type="text" + id="tag-{{config.attribute}}-{{index}}" + class="form-control" + [ngbTooltip]="config.attribute" + [value]="tag[config.attribute]" + disabled + readonly> + </ng-container> + + <!-- Tag actions --> + <button type="button" + class="btn btn-light" + id="tag-edit-{{index}}" + i18n-ngbTooltip + ngbTooltip="Edit" + (click)="showTagModal(index)"> + <i [ngClass]="[icons.edit]"></i> + </button> + <button type="button" + class="btn btn-light" + id="tag-delete-{{index}}" + i18n-ngbTooltip + ngbTooltip="Delete" + (click)="deleteTag(index)"> + <i [ngClass]="[icons.trash]"></i> + </button> + </div> +</ng-template> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.ts index de8e0383ac0..6b90b45e16a 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.ts @@ -21,6 +21,7 @@ import { RgwBucketEncryptionModel } from '../models/rgw-bucket-encryption'; import { RgwBucketMfaDelete } from '../models/rgw-bucket-mfa-delete'; import { RgwBucketVersioning } from '../models/rgw-bucket-versioning'; import { RgwConfigModalComponent } from '../rgw-config-modal/rgw-config-modal.component'; +import { BucketTagModalComponent } from '../bucket-tag-modal/bucket-tag-modal.component'; @Component({ selector: 'cd-rgw-bucket-form', @@ -42,6 +43,15 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC icons = Icons; kmsVaultConfig = false; s3VaultConfig = false; + tags: Record<string, string>[] = []; + tagConfig = [ + { + attribute: 'key' + }, + { + attribute: 'value' + } + ]; get isVersioningEnabled(): boolean { return this.bucketForm.getValue('versioning'); @@ -191,6 +201,11 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC value['versioning'] = bidResp['versioning'] === RgwBucketVersioning.ENABLED; value['mfa-delete'] = bidResp['mfa_delete'] === RgwBucketMfaDelete.ENABLED; value['encryption_enabled'] = bidResp['encryption'] === 'Enabled'; + if (bidResp['tagset']) { + for (const [key, value] of Object.entries(bidResp['tagset'])) { + this.tags.push({ key: key, value: value.toString() }); + } + } // Append default values. value = _.merge(defaults, value); // Update the form. @@ -224,6 +239,7 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC return; } const values = this.bucketForm.value; + const xmlStrTags = this.tagsToXML(this.tags); if (this.editing) { // Edit const versioning = this.getVersioningStatus(); @@ -241,7 +257,8 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC values['mfa-token-serial'], values['mfa-token-pin'], values['lock_mode'], - values['lock_retention_period_days'] + values['lock_retention_period_days'], + xmlStrTags ) .subscribe( () => { @@ -269,7 +286,8 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC values['lock_retention_period_days'], values['encryption_enabled'], values['encryption_type'], - values['keyId'] + values['keyId'], + xmlStrTags ) .subscribe( () => { @@ -337,4 +355,51 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC .get('encryptionType') .setValue(this.bucketForm.getValue('encryption_type') || 'AES256'); } + + showTagModal(index?: number) { + const modalRef = this.modalService.show(BucketTagModalComponent); + const modalComponent = modalRef.componentInstance as BucketTagModalComponent; + modalComponent.currentKeyTags = this.tags.map((item) => item.key); + + if (_.isNumber(index)) { + modalComponent.editMode = true; + modalComponent.fillForm(this.tags[index]); + modalComponent.storedKey = this.tags[index]['key']; + } + + modalComponent.submitAction.subscribe((tag: Record<string, string>) => { + this.setTag(tag, index); + }); + } + + deleteTag(index: number) { + this.tags.splice(index, 1); + } + + private setTag(tag: Record<string, string>, index?: number) { + if (_.isNumber(index)) { + this.tags[index] = tag; + } else { + this.tags.push(tag); + } + this.bucketForm.markAsDirty(); + this.bucketForm.updateValueAndValidity(); + } + + private tagsToXML(tags: Record<string, string>[]): string { + let xml = '<Tagging><TagSet>'; + for (const tag of tags) { + xml += '<Tag>'; + for (const key in tag) { + if (key === 'key') { + xml += `<Key>${tag[key]}</Key>`; + } else if (key === 'value') { + xml += `<Value>${tag[key]}</Value>`; + } + } + xml += '</Tag>'; + } + xml += '</TagSet></Tagging>'; + return xml; + } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts index c16c13a81bd..8fcf3c6c664 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts @@ -44,6 +44,7 @@ import { DashboardV3Module } from '../dashboard-v3/dashboard-v3.module'; import { RgwSyncPrimaryZoneComponent } from './rgw-sync-primary-zone/rgw-sync-primary-zone.component'; import { RgwSyncMetadataInfoComponent } from './rgw-sync-metadata-info/rgw-sync-metadata-info.component'; import { RgwSyncDataInfoComponent } from './rgw-sync-data-info/rgw-sync-data-info.component'; +import { BucketTagModalComponent } from './bucket-tag-modal/bucket-tag-modal.component'; @NgModule({ imports: [ @@ -100,7 +101,8 @@ import { RgwSyncDataInfoComponent } from './rgw-sync-data-info/rgw-sync-data-inf RgwOverviewDashboardComponent, RgwSyncPrimaryZoneComponent, RgwSyncMetadataInfoComponent, - RgwSyncDataInfoComponent + RgwSyncDataInfoComponent, + BucketTagModalComponent ] }) export class RgwModule {} @@ -156,6 +158,13 @@ const routes: Routes = [ data: { breadcrumbs: ActionLabels.CREATE } + }, + { + path: URLVerbs.EDIT, + component: CrudFormComponent, + data: { + breadcrumbs: ActionLabels.EDIT + } } ] }, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/context/context.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/core/context/context.component.ts index e036b754438..178f230c931 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/context/context.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/context/context.component.ts @@ -24,12 +24,14 @@ export class ContextComponent implements OnInit, OnDestroy { private subs = new Subscription(); private rgwUrlPrefix = '/rgw'; private rgwUserUrlPrefix = '/rgw/user'; + private rgwRoleUrlPrefix = '/rgw/roles'; private rgwBuckerUrlPrefix = '/rgw/bucket'; permissions: Permissions; featureToggleMap$: FeatureTogglesMap$; isRgwRoute = document.location.href.includes(this.rgwUserUrlPrefix) || - document.location.href.includes(this.rgwBuckerUrlPrefix); + document.location.href.includes(this.rgwBuckerUrlPrefix) || + document.location.href.includes(this.rgwRoleUrlPrefix); constructor( private authStorageService: AuthStorageService, @@ -48,9 +50,11 @@ export class ContextComponent implements OnInit, OnDestroy { .pipe(filter((event: Event) => event instanceof NavigationEnd)) .subscribe( () => - (this.isRgwRoute = [this.rgwBuckerUrlPrefix, this.rgwUserUrlPrefix].some((urlPrefix) => - this.router.url.startsWith(urlPrefix) - )) + (this.isRgwRoute = [ + this.rgwBuckerUrlPrefix, + this.rgwUserUrlPrefix, + this.rgwRoleUrlPrefix + ].some((urlPrefix) => this.router.url.startsWith(urlPrefix))) ) ); // Set daemon list polling only when in RGW route: diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-snapshot-schedule.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-snapshot-schedule.service.spec.ts new file mode 100644 index 00000000000..766b8f385b2 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-snapshot-schedule.service.spec.ts @@ -0,0 +1,22 @@ +import { TestBed } from '@angular/core/testing'; +import { HttpClientTestingModule } from '@angular/common/http/testing'; + +import { configureTestBed } from '~/testing/unit-test-helper'; +import { CephfsSnapshotScheduleService } from './cephfs-snapshot-schedule.service'; + +describe('CephfsSnapshotScheduleService', () => { + let service: CephfsSnapshotScheduleService; + + configureTestBed({ + providers: [CephfsSnapshotScheduleService], + imports: [HttpClientTestingModule] + }); + + beforeEach(() => { + service = TestBed.inject(CephfsSnapshotScheduleService); + }); + + it('should be created', () => { + expect(service).toBeTruthy(); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-snapshot-schedule.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-snapshot-schedule.service.ts new file mode 100644 index 00000000000..ec9f58c0fee --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-snapshot-schedule.service.ts @@ -0,0 +1,39 @@ +import { HttpClient } from '@angular/common/http'; +import { Injectable } from '@angular/core'; +import { Observable } from 'rxjs/internal/Observable'; +import { SnapshotSchedule } from '../models/snapshot-schedule'; +import { map } from 'rxjs/operators'; + +@Injectable({ + providedIn: 'root' +}) +export class CephfsSnapshotScheduleService { + baseURL = 'api/cephfs'; + + constructor(private http: HttpClient) {} + + getSnapshotScheduleList( + path: string, + fs: string, + recursive = true + ): Observable<SnapshotSchedule[]> { + return this.http + .get<SnapshotSchedule[]>( + `${this.baseURL}/snaphost/schedule?path=${path}&fs=${fs}&recursive=${recursive}` + ) + .pipe( + map((snapList: SnapshotSchedule[]) => + snapList.map((snapItem: SnapshotSchedule) => ({ + ...snapItem, + status: snapItem.active ? 'Active' : 'Inactive', + subvol: snapItem?.subvol || ' - ', + retention: Object.values(snapItem.retention)?.length + ? Object.entries(snapItem.retention) + ?.map?.(([frequency, interval]) => `${interval}${frequency.toLocaleUpperCase()}`) + .join(' ') + : '-' + })) + ) + ); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume-group.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume-group.service.ts index db7fcfacd59..49d001f04f0 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume-group.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume-group.service.ts @@ -1,9 +1,9 @@ import { HttpClient } from '@angular/common/http'; import { Injectable } from '@angular/core'; import { Observable, of } from 'rxjs'; -import { CephfsSubvolumeGroup } from '../models/cephfs-subvolumegroup.model'; import _ from 'lodash'; import { mapTo, catchError } from 'rxjs/operators'; +import { CephfsSubvolumeGroup } from '../models/cephfs-subvolume-group.model'; @Injectable({ providedIn: 'root' @@ -13,8 +13,12 @@ export class CephfsSubvolumeGroupService { constructor(private http: HttpClient) {} - get(volName: string): Observable<CephfsSubvolumeGroup[]> { - return this.http.get<CephfsSubvolumeGroup[]>(`${this.baseURL}/${volName}`); + get(volName: string, info = true): Observable<CephfsSubvolumeGroup[]> { + return this.http.get<CephfsSubvolumeGroup[]>(`${this.baseURL}/${volName}`, { + params: { + info: info + } + }); } create( diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume.service.spec.ts index e40e9a52f3f..2e8448ff1a2 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume.service.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume.service.spec.ts @@ -29,7 +29,7 @@ describe('CephfsSubvolumeService', () => { it('should call get', () => { service.get('testFS').subscribe(); - const req = httpTesting.expectOne('api/cephfs/subvolume/testFS?group_name='); + const req = httpTesting.expectOne('api/cephfs/subvolume/testFS?group_name=&info=true'); expect(req.request.method).toBe('GET'); }); @@ -40,4 +40,12 @@ describe('CephfsSubvolumeService', () => { ); expect(req.request.method).toBe('DELETE'); }); + + it('should call getSnapshots', () => { + service.getSnapshots('testFS', 'testSubvol').subscribe(); + const req = httpTesting.expectOne( + 'api/cephfs/subvolume/snapshot/testFS/testSubvol?group_name=' + ); + expect(req.request.method).toBe('GET'); + }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume.service.ts index 4c167725007..d76523aafd2 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume.service.ts @@ -1,6 +1,6 @@ import { HttpClient } from '@angular/common/http'; import { Injectable } from '@angular/core'; -import { CephfsSubvolume } from '../models/cephfs-subvolume.model'; +import { CephfsSubvolume, SubvolumeSnapshot } from '../models/cephfs-subvolume.model'; import { Observable, of } from 'rxjs'; import { catchError, mapTo } from 'rxjs/operators'; import _ from 'lodash'; @@ -13,10 +13,11 @@ export class CephfsSubvolumeService { constructor(private http: HttpClient) {} - get(fsName: string, subVolumeGroupName: string = ''): Observable<CephfsSubvolume[]> { + get(fsName: string, subVolumeGroupName: string = '', info = true): Observable<CephfsSubvolume[]> { return this.http.get<CephfsSubvolume[]>(`${this.baseURL}/${fsName}`, { params: { - group_name: subVolumeGroupName + group_name: subVolumeGroupName, + info: info } }); } @@ -86,6 +87,14 @@ export class CephfsSubvolumeService { ); } + existsInFs(fsName: string, groupName = ''): Observable<boolean> { + return this.http.get<boolean>(`${this.baseURL}/${fsName}/exists`, { + params: { + group_name: groupName + } + }); + } + update(fsName: string, subVolumeName: string, size: string, subVolumeGroupName: string = '') { return this.http.put(`${this.baseURL}/${fsName}`, { subvol_name: subVolumeName, @@ -93,4 +102,19 @@ export class CephfsSubvolumeService { group_name: subVolumeGroupName }); } + + getSnapshots( + fsName: string, + subVolumeName: string, + groupName = '' + ): Observable<SubvolumeSnapshot[]> { + return this.http.get<SubvolumeSnapshot[]>( + `${this.baseURL}/snapshot/${fsName}/${subVolumeName}`, + { + params: { + group_name: groupName + } + } + ); + } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.spec.ts index 2c42d8b427c..15821c3b626 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.spec.ts @@ -59,11 +59,12 @@ describe('RgwBucketService', () => { '5', true, 'aws:kms', - 'qwerty1' + 'qwerty1', + null ) .subscribe(); const req = httpTesting.expectOne( - `api/rgw/bucket?bucket=foo&uid=bar&zonegroup=default&placement_target=default-placement&lock_enabled=false&lock_mode=COMPLIANCE&lock_retention_period_days=5&encryption_state=true&encryption_type=aws%253Akms&key_id=qwerty1&${RgwHelper.DAEMON_QUERY_PARAM}` + `api/rgw/bucket?bucket=foo&uid=bar&zonegroup=default&placement_target=default-placement&lock_enabled=false&lock_mode=COMPLIANCE&lock_retention_period_days=5&encryption_state=true&encryption_type=aws%253Akms&key_id=qwerty1&tags=null&${RgwHelper.DAEMON_QUERY_PARAM}` ); expect(req.request.method).toBe('POST'); }); @@ -82,11 +83,12 @@ describe('RgwBucketService', () => { '1', '223344', 'GOVERNANCE', - '10' + '10', + null ) .subscribe(); const req = httpTesting.expectOne( - `api/rgw/bucket/foo?${RgwHelper.DAEMON_QUERY_PARAM}&bucket_id=bar&uid=baz&versioning_state=Enabled&encryption_state=true&encryption_type=aws%253Akms&key_id=qwerty1&mfa_delete=Enabled&mfa_token_serial=1&mfa_token_pin=223344&lock_mode=GOVERNANCE&lock_retention_period_days=10` + `api/rgw/bucket/foo?${RgwHelper.DAEMON_QUERY_PARAM}&bucket_id=bar&uid=baz&versioning_state=Enabled&encryption_state=true&encryption_type=aws%253Akms&key_id=qwerty1&mfa_delete=Enabled&mfa_token_serial=1&mfa_token_pin=223344&lock_mode=GOVERNANCE&lock_retention_period_days=10&tags=null` ); expect(req.request.method).toBe('PUT'); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.ts index 7207d0b5ca7..87561d92d89 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.ts @@ -59,7 +59,8 @@ export class RgwBucketService extends ApiClient { lock_retention_period_days: string, encryption_state: boolean, encryption_type: string, - key_id: string + key_id: string, + tags: string ) { return this.rgwDaemonService.request((params: HttpParams) => { return this.http.post(this.url, null, { @@ -75,6 +76,7 @@ export class RgwBucketService extends ApiClient { encryption_state: String(encryption_state), encryption_type, key_id, + tags: tags, daemon_name: params.get('daemon_name') } }) @@ -94,7 +96,8 @@ export class RgwBucketService extends ApiClient { mfaTokenSerial: string, mfaTokenPin: string, lockMode: 'GOVERNANCE' | 'COMPLIANCE', - lockRetentionPeriodDays: string + lockRetentionPeriodDays: string, + tags: string ) { return this.rgwDaemonService.request((params: HttpParams) => { params = params.appendAll({ @@ -108,7 +111,8 @@ export class RgwBucketService extends ApiClient { mfa_token_serial: mfaTokenSerial, mfa_token_pin: mfaTokenPin, lock_mode: lockMode, - lock_retention_period_days: lockRetentionPeriodDays + lock_retention_period_days: lockRetentionPeriodDays, + tags: tags }); return this.http.put(`${this.url}/${bucket}`, null, { params: params }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.html b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.html new file mode 100644 index 00000000000..7cf78b8d1fb --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.html @@ -0,0 +1,21 @@ +<ng-container *ngIf="codes.length > 1; else singleCodeBlock"> + <pre id="bigCodeBlock"> + <span *ngFor="let code of codes" + class="d-flex p-2 align-items-center justify-content-between text-dark"> + <span>{{code}}</span> + <cd-copy-2-clipboard-button + [source]="code" + [byId]="false"></cd-copy-2-clipboard-button> + </span> + </pre> +</ng-container> + +<ng-template #singleCodeBlock> + <pre class="d-flex p-2 align-items-center justify-content-between text-dark" + id="singleCodeBlock"> + <span>{{codes}}</span> + <cd-copy-2-clipboard-button + [source]="codes" + [byId]="false"></cd-copy-2-clipboard-button> + </pre> +</ng-template> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.scss new file mode 100644 index 00000000000..f601dfe6609 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.scss @@ -0,0 +1,6 @@ +@use './src/styles/vendor/variables' as vv; + +pre { + background-color: vv.$code-block-bg; + border-radius: 0.5rem; +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.spec.ts new file mode 100644 index 00000000000..bc5ad428fd8 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.spec.ts @@ -0,0 +1,38 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { CodeBlockComponent } from './code-block.component'; +import { configureTestBed } from '~/testing/unit-test-helper'; + +describe('CodeBlockComponent', () => { + let component: CodeBlockComponent; + let fixture: ComponentFixture<CodeBlockComponent>; + + configureTestBed({ + declarations: [CodeBlockComponent] + }); + + beforeEach(() => { + fixture = TestBed.createComponent(CodeBlockComponent); + component = fixture.componentInstance; + component.codes = []; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); + + it('should show single codeblock if there are only one code', () => { + component.codes = ['code']; + fixture.detectChanges(); + expect(fixture.nativeElement.querySelector('#singleCodeBlock')).not.toBeNull(); + expect(fixture.nativeElement.querySelector('#bigCodeBlock')).toBeNull(); + }); + + it('should show single codeblock if there are only one code', () => { + component.codes = ['code1', 'code2']; + fixture.detectChanges(); + expect(fixture.nativeElement.querySelector('#bigCodeBlock')).not.toBeNull(); + expect(fixture.nativeElement.querySelector('#singleCodeBlock')).toBeNull(); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.ts new file mode 100644 index 00000000000..91d2d991f37 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.ts @@ -0,0 +1,11 @@ +import { Component, Input } from '@angular/core'; + +@Component({ + selector: 'cd-code-block', + templateUrl: './code-block.component.html', + styleUrls: ['./code-block.component.scss'] +}) +export class CodeBlockComponent { + @Input() + codes: string[]; +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts index 17f418d1e14..1f31374c18b 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts @@ -51,6 +51,8 @@ import { UsageBarComponent } from './usage-bar/usage-bar.component'; import { WizardComponent } from './wizard/wizard.component'; import { CardComponent } from './card/card.component'; import { CardRowComponent } from './card-row/card-row.component'; +import { CodeBlockComponent } from './code-block/code-block.component'; +import { VerticalNavigationComponent } from './vertical-navigation/vertical-navigation.component'; @NgModule({ imports: [ @@ -105,7 +107,9 @@ import { CardRowComponent } from './card-row/card-row.component'; CdLabelComponent, ColorClassFromTextPipe, CardComponent, - CardRowComponent + CardRowComponent, + CodeBlockComponent, + VerticalNavigationComponent ], providers: [], exports: [ @@ -137,7 +141,9 @@ import { CardRowComponent } from './card-row/card-row.component'; CustomLoginBannerComponent, CdLabelComponent, CardComponent, - CardRowComponent + CardRowComponent, + CodeBlockComponent, + VerticalNavigationComponent ] }) export class ComponentsModule {} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.html b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.html new file mode 100644 index 00000000000..ae48d7fd7fc --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.html @@ -0,0 +1,24 @@ +<ng-container *ngIf="items.length"> + <h3 i18n + *ngIf="title">{{title}}</h3> + <input type="text" + placeholder="Filter by name..." + (keyup)="updateFilter()" + [id]="inputIdentifier" + class="form-control text-center mb-2"> + <div class="overflow-auto"> + <ul class="nav flex-column nav-pills"> + <li class="nav-item" + *ngFor="let item of filteredItems; trackBy: trackByFn"> + <a class="nav-link" + [class.active]="!activeItem" + (click)="selectItem()" + *ngIf="item === ''">_nogroup</a> + <a class="nav-link text-decoration-none text-break" + [class.active]="item === activeItem" + (click)="selectItem(item)" + *ngIf="item !== ''">{{item}}</a> + </li> + </ul> + </div> +</ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.scss new file mode 100644 index 00000000000..569e2d68708 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.scss @@ -0,0 +1,3 @@ +.overflow-auto { + max-height: 50vh; +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.spec.ts new file mode 100644 index 00000000000..0d45b339a20 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.spec.ts @@ -0,0 +1,60 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { VerticalNavigationComponent } from './vertical-navigation.component'; +import { By } from '@angular/platform-browser'; + +describe('VerticalNavigationComponent', () => { + let component: VerticalNavigationComponent; + let fixture: ComponentFixture<VerticalNavigationComponent>; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [VerticalNavigationComponent] + }).compileComponents(); + + fixture = TestBed.createComponent(VerticalNavigationComponent); + component = fixture.componentInstance; + component.items = ['item1', 'item2', 'item3']; + component.inputIdentifier = 'filter'; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); + + it('should have a title', () => { + component.title = 'testTitle'; + fixture.detectChanges(); + const title = fixture.debugElement.query(By.css('h3')); + expect(title.nativeElement.textContent).toEqual('testTitle'); + }); + + it('should select the first item as active if no item is selected', () => { + expect(component.activeItem).toEqual('item1'); + }); + + it('should filter the items by the keyword in filter input', () => { + const event = new KeyboardEvent('keyup'); + const filterInput = fixture.debugElement.query(By.css('#filter')); + filterInput.nativeElement.value = 'item1'; + filterInput.nativeElement.dispatchEvent(event); + fixture.detectChanges(); + expect(component.filteredItems).toEqual(['item1']); + + filterInput.nativeElement.value = 'item2'; + filterInput.nativeElement.dispatchEvent(event); + fixture.detectChanges(); + expect(component.filteredItems).toEqual(['item2']); + }); + + it('should select the item when clicked', () => { + component.activeItem = ''; + + // click on the first item in the nav list + const item = fixture.debugElement.query(By.css('.nav-link')); + item.nativeElement.click(); + fixture.detectChanges(); + expect(component.activeItem).toEqual('item1'); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.ts new file mode 100644 index 00000000000..a46cc4f6c43 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.ts @@ -0,0 +1,37 @@ +import { Component, EventEmitter, Input, OnInit, Output } from '@angular/core'; + +@Component({ + selector: 'cd-vertical-navigation', + templateUrl: './vertical-navigation.component.html', + styleUrls: ['./vertical-navigation.component.scss'] +}) +export class VerticalNavigationComponent implements OnInit { + @Input() items: string[]; + @Input() title: string; + @Input() inputIdentifier: string; + + @Output() emitFilteredItems: EventEmitter<string[]> = new EventEmitter(); + @Output() emitActiveItem: EventEmitter<string> = new EventEmitter(); + + activeItem = ''; + filteredItems: string[]; + + ngOnInit(): void { + this.filteredItems = this.items; + if (!this.activeItem && this.items.length) this.selectItem(this.items[0]); + } + + updateFilter() { + const filterInput = document.getElementById(this.inputIdentifier) as HTMLInputElement; + this.filteredItems = this.items.filter((item) => item.includes(filterInput.value)); + } + + selectItem(item = '') { + this.activeItem = item; + this.emitActiveItem.emit(item); + } + + trackByFn(item: number) { + return item; + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/crud-table/crud-table.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/crud-table/crud-table.component.ts index 750152161c2..6881e373b58 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/crud-table/crud-table.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/crud-table/crud-table.component.ts @@ -120,7 +120,7 @@ export class CRUDTableComponent implements OnInit { delete() { const selectedKey = this.selection.first()[this.meta.columnKey]; this.modalRef = this.modalService.show(CriticalConfirmationModalComponent, { - itemDescription: $localize`${this.meta.columnKey}`, + itemDescription: $localize`${this.meta.resource}`, itemNames: [selectedKey], submitAction: () => { this.taskWrapper @@ -153,7 +153,9 @@ export class CRUDTableComponent implements OnInit { if (this.selection.hasSelection) { key = this.selection.first()[this.meta.columnKey]; } - this.router.navigate(['/cluster/user/edit'], { queryParams: { key: key } }); + + const editAction = this.meta.actions.find((action) => action.name === 'Edit'); + this.router.navigate([editAction.routerLink], { queryParams: { key: key } }); } authExport() { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/datatable.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/datatable.module.ts index 37e94f236be..76cbbcfb3a2 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/datatable.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/datatable.module.ts @@ -61,7 +61,11 @@ import { CheckedTableFormComponent } from './checked-table-form/checked-table-fo 'Role path must start and finish with a slash "/".' + ' (pattern: (\u002F)|(\u002F[\u0021-\u007E]+\u002F))' }, - { name: 'file_size', message: 'File size must not exceed 4KiB' } + { name: 'file_size', message: 'File size must not exceed 4KiB' }, + { + name: 'rgwRoleSessionDuration', + message: 'This field must be a number and should be a value from 1 hour to 12 hour' + } ], wrappers: [{ name: 'input-wrapper', component: FormlyInputWrapperComponent }] }), diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/crud-form/helpers.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/crud-form/helpers.ts index 1ea21b71081..aca9a20af09 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/crud-form/helpers.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/crud-form/helpers.ts @@ -3,7 +3,11 @@ import { FormlyFieldConfig } from '@ngx-formly/core'; import { forEach } from 'lodash'; import { formlyAsyncFileValidator } from './validators/file-validator'; import { formlyAsyncJsonValidator } from './validators/json-validator'; -import { formlyRgwRoleNameValidator, formlyRgwRolePath } from './validators/rgw-role-validator'; +import { + formlyFormNumberValidator, + formlyRgwRoleNameValidator, + formlyRgwRolePath +} from './validators/rgw-role-validator'; export function getFieldState(field: FormlyFieldConfig, uiSchema: any[] = undefined) { const formState: any[] = uiSchema || field.options?.formState; @@ -34,6 +38,10 @@ export function setupValidators(field: FormlyFieldConfig, uiSchema: any[]) { validators.push(formlyAsyncFileValidator); break; } + case 'rgwRoleSessionDuration': { + validators.push(formlyFormNumberValidator); + break; + } } }); field.asyncValidators = { validation: validators }; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/crud-form/validators/rgw-role-validator.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/crud-form/validators/rgw-role-validator.ts index a100f278bea..c994dc96407 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/crud-form/validators/rgw-role-validator.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/crud-form/validators/rgw-role-validator.ts @@ -17,3 +17,12 @@ export function formlyRgwRoleNameValidator(control: AbstractControl): Promise<an resolve({ rgwRoleName: true }); }); } + +export function formlyFormNumberValidator(control: AbstractControl): Promise<any> { + return new Promise((resolve, _reject) => { + if (control.value.match('^[0-9.]+$')) { + if (control.value <= 12 && control.value >= 1) resolve(null); + } + resolve({ rgwRoleSessionDuration: true }); + }); +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolume-group.model.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolume-group.model.ts index fc087ab53d0..246e4543eb9 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolume-group.model.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolume-group.model.ts @@ -1,6 +1,6 @@ export interface CephfsSubvolumeGroup { name: string; - info: CephfsSubvolumeGroupInfo; + info?: CephfsSubvolumeGroupInfo; } export interface CephfsSubvolumeGroupInfo { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolume.model.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolume.model.ts index 41858be6130..25a2a5acc7f 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolume.model.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolume.model.ts @@ -16,3 +16,13 @@ export interface CephfsSubvolumeInfo { gid: number; pool_namespace: string; } + +export interface SubvolumeSnapshot { + name: string; + info: SubvolumeSnapshotInfo; +} + +export interface SubvolumeSnapshotInfo { + created_at: string; + has_pending_clones: string; +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolumegroup.model.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolumegroup.model.ts deleted file mode 100644 index fc087ab53d0..00000000000 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolumegroup.model.ts +++ /dev/null @@ -1,13 +0,0 @@ -export interface CephfsSubvolumeGroup { - name: string; - info: CephfsSubvolumeGroupInfo; -} - -export interface CephfsSubvolumeGroupInfo { - mode: number; - bytes_pcent: number; - bytes_quota: number; - data_pool: string; - state: string; - created_at: string; -} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/crud-table-metadata.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/crud-table-metadata.ts index 140fa5b5f8e..dc33e6236ae 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/crud-table-metadata.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/crud-table-metadata.ts @@ -14,4 +14,5 @@ export class CrudMetadata { actions: CdTableAction[]; forms: any; columnKey: string; + resource: string; } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/snapshot-schedule.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/snapshot-schedule.ts new file mode 100644 index 00000000000..b1cea7466f6 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/snapshot-schedule.ts @@ -0,0 +1,17 @@ +export interface SnapshotSchedule { + fs?: string; + subvol?: string; + path: string; + rel_path?: string; + schedule: string; + retention?: Record<string, number> | string; + start: Date; + created: Date; + first?: string; + last?: string; + last_pruned?: string; + created_count?: number; + pruned_count?: number; + active: boolean; + status: 'Active' | 'Inactive'; +} diff --git a/src/pybind/mgr/dashboard/frontend/src/styles/defaults/_bootstrap-defaults.scss b/src/pybind/mgr/dashboard/frontend/src/styles/defaults/_bootstrap-defaults.scss index e9c8a595620..d69abf12bc8 100644 --- a/src/pybind/mgr/dashboard/frontend/src/styles/defaults/_bootstrap-defaults.scss +++ b/src/pybind/mgr/dashboard/frontend/src/styles/defaults/_bootstrap-defaults.scss @@ -96,6 +96,8 @@ $chart-color-translucent-blue: #0096dc80 !default; $chart-color-border: #00000020 !default; $chart-color-translucent-yellow: #ef923472 !default; +$code-block-bg: #f7f7f9 !default; + // Typography $font-family-sans-serif: 'Helvetica Neue', Helvetica, Arial, 'Noto Sans', sans-serif, diff --git a/src/pybind/mgr/dashboard/openapi.yaml b/src/pybind/mgr/dashboard/openapi.yaml index d35ea87e15a..0ed83aab6c8 100644 --- a/src/pybind/mgr/dashboard/openapi.yaml +++ b/src/pybind/mgr/dashboard/openapi.yaml @@ -1758,6 +1758,43 @@ paths: summary: Rename CephFS Volume tags: - Cephfs + /api/cephfs/snaphost/schedule: + get: + parameters: + - in: query + name: fs + required: true + schema: + type: string + - default: / + in: query + name: path + schema: + type: string + - default: true + in: query + name: recursive + schema: + type: boolean + responses: + '200': + content: + application/vnd.ceph.api.v1.0+json: + type: object + description: OK + '400': + description: Operation exception. Please check the response body for details. + '401': + description: Unauthenticated access. Please login first. + '403': + description: Unauthorized access. Please check your permissions. + '500': + description: Unexpected error. Please check the response body for the stack + trace. + security: + - jwt: [] + tags: + - CephFSSnapshotSchedule /api/cephfs/subvolume: post: parameters: [] @@ -1882,6 +1919,11 @@ paths: required: true schema: type: string + - default: true + in: query + name: info + schema: + type: boolean responses: '200': content: @@ -1977,6 +2019,48 @@ paths: - jwt: [] tags: - CephfsSubvolumeGroup + /api/cephfs/subvolume/snapshot/{vol_name}/{subvol_name}: + get: + parameters: + - in: path + name: vol_name + required: true + schema: + type: string + - in: path + name: subvol_name + required: true + schema: + type: string + - default: '' + in: query + name: group_name + schema: + type: string + - default: true + in: query + name: info + schema: + type: boolean + responses: + '200': + content: + application/vnd.ceph.api.v1.0+json: + type: object + description: OK + '400': + description: Operation exception. Please check the response body for details. + '401': + description: Unauthenticated access. Please login first. + '403': + description: Unauthorized access. Please check your permissions. + '500': + description: Unexpected error. Please check the response body for the stack + trace. + security: + - jwt: [] + tags: + - CephfsSubvolumeSnapshot /api/cephfs/subvolume/{vol_name}: delete: parameters: @@ -2036,6 +2120,11 @@ paths: name: group_name schema: type: string + - default: true + in: query + name: info + schema: + type: boolean responses: '200': content: @@ -2102,6 +2191,38 @@ paths: - jwt: [] tags: - CephFSSubvolume + /api/cephfs/subvolume/{vol_name}/exists: + get: + parameters: + - in: path + name: vol_name + required: true + schema: + type: string + - default: '' + in: query + name: group_name + schema: + type: string + responses: + '200': + content: + application/vnd.ceph.api.v1.0+json: + type: object + description: OK + '400': + description: Operation exception. Please check the response body for details. + '401': + description: Unauthenticated access. Please login first. + '403': + description: Unauthorized access. Please check your permissions. + '500': + description: Unexpected error. Please check the response body for the stack + trace. + security: + - jwt: [] + tags: + - CephFSSubvolume /api/cephfs/subvolume/{vol_name}/info: get: parameters: @@ -8817,6 +8938,8 @@ paths: type: string placement_target: type: string + tags: + type: string uid: type: string zonegroup: @@ -9129,6 +9252,8 @@ paths: type: string mfa_token_serial: type: string + tags: + type: string uid: type: string versioning_state: @@ -9596,7 +9721,80 @@ paths: trace. security: - jwt: [] - summary: Create Ceph User + summary: Create RGW role + tags: + - RGW + put: + parameters: [] + requestBody: + content: + application/json: + schema: + properties: + max_session_duration: + type: string + role_name: + type: string + required: + - role_name + - max_session_duration + type: object + responses: + '200': + content: + application/vnd.ceph.api.v1.0+json: + type: object + description: Resource updated. + '202': + content: + application/vnd.ceph.api.v1.0+json: + type: object + description: Operation is still executing. Please check the task queue. + '400': + description: Operation exception. Please check the response body for details. + '401': + description: Unauthenticated access. Please login first. + '403': + description: Unauthorized access. Please check your permissions. + '500': + description: Unexpected error. Please check the response body for the stack + trace. + security: + - jwt: [] + summary: Edit RGW role + tags: + - RGW + /api/rgw/roles/{role_name}: + delete: + parameters: + - in: path + name: role_name + required: true + schema: + type: string + responses: + '202': + content: + application/vnd.ceph.api.v1.0+json: + type: object + description: Operation is still executing. Please check the task queue. + '204': + content: + application/vnd.ceph.api.v1.0+json: + type: object + description: Resource deleted. + '400': + description: Operation exception. Please check the response body for details. + '401': + description: Unauthenticated access. Please login first. + '403': + description: Unauthorized access. Please check your permissions. + '500': + description: Unexpected error. Please check the response body for the stack + trace. + security: + - jwt: [] + summary: Delete RGW role tags: - RGW /api/rgw/site: @@ -12771,12 +12969,16 @@ servers: tags: - description: Initiate a session with Ceph name: Auth +- description: Cephfs Snapshot Scheduling API + name: CephFSSnapshotSchedule - description: CephFS Subvolume Management API name: CephFSSubvolume - description: Cephfs Management API name: Cephfs - description: Cephfs Subvolume Group Management API name: CephfsSubvolumeGroup +- description: Cephfs Subvolume Snapshot Management API + name: CephfsSubvolumeSnapshot - description: Get Cluster Details name: Cluster - description: Manage Cluster Configurations diff --git a/src/pybind/mgr/dashboard/services/rgw_client.py b/src/pybind/mgr/dashboard/services/rgw_client.py index 2d3226bab3e..aec3b89b64c 100644 --- a/src/pybind/mgr/dashboard/services/rgw_client.py +++ b/src/pybind/mgr/dashboard/services/rgw_client.py @@ -702,6 +702,19 @@ class RgwClient(RestClient): except RequestException as e: raise DashboardException(msg=str(e), component='rgw') + @RestClient.api_put('/{bucket_name}?tagging') + def set_tags(self, bucket_name, tags, request=None): + # pylint: disable=unused-argument + try: + ET.fromstring(tags) + except ET.ParseError: + return "Data must be properly formatted" + try: + result = request(data=tags) # type: ignore + except RequestException as e: + raise DashboardException(msg=str(e), component='rgw') + return result + @RestClient.api_get('/{bucket_name}?object-lock') def get_bucket_locking(self, bucket_name, request=None): # type: (str, Optional[object]) -> dict @@ -852,6 +865,31 @@ class RgwClient(RestClient): f' For more information about the format look at {link}') raise DashboardException(msg=msg, component='rgw') + def get_role(self, role_name: str): + rgw_get_role_command = ['role', 'get', '--role-name', role_name] + code, role, _err = mgr.send_rgwadmin_command(rgw_get_role_command) + if code != 0: + raise DashboardException(msg=f'Error getting role with code {code}: {_err}', + component='rgw') + return role + + def update_role(self, role_name: str, max_session_duration: str): + rgw_update_role_command = ['role', 'update', '--role-name', + role_name, '--max_session_duration', max_session_duration] + code, _, _err = mgr.send_rgwadmin_command(rgw_update_role_command, + stdout_as_json=False) + if code != 0: + raise DashboardException(msg=f'Error updating role with code {code}: {_err}', + component='rgw') + + def delete_role(self, role_name: str) -> None: + rgw_delete_role_command = ['role', 'delete', '--role-name', role_name] + code, _, _err = mgr.send_rgwadmin_command(rgw_delete_role_command, + stdout_as_json=False) + if code != 0: + raise DashboardException(msg=f'Error deleting role with code {code}: {_err}', + component='rgw') + @RestClient.api_get('/{bucket_name}?policy') def get_bucket_policy(self, bucket_name: str, request=None): """ diff --git a/src/pybind/mgr/devicehealth/module.py b/src/pybind/mgr/devicehealth/module.py index 07768db7565..86ea1d28072 100644 --- a/src/pybind/mgr/devicehealth/module.py +++ b/src/pybind/mgr/devicehealth/module.py @@ -502,8 +502,8 @@ CREATE TABLE DeviceHealthMetrics ( def put_device_metrics(self, devid: str, data: Any) -> None: SQL = """ - INSERT INTO DeviceHealthMetrics (devid, raw_smart) - VALUES (?, ?); + INSERT OR REPLACE INTO DeviceHealthMetrics (devid, raw_smart, time) + VALUES (?, ?, strftime('%s', 'now')); """ with self._db_lock, self.db: diff --git a/src/pybind/mgr/nfs/export.py b/src/pybind/mgr/nfs/export.py index 5887c898fef..aaa93c34f6c 100644 --- a/src/pybind/mgr/nfs/export.py +++ b/src/pybind/mgr/nfs/export.py @@ -167,9 +167,22 @@ class AppliedExportResults: def __init__(self) -> None: self.changes: List[Dict[str, str]] = [] self.has_error = False + self.exceptions: List[Exception] = [] + self.faulty_export_block_indices = "" + self.num_errors = 0 + self.status = "" - def append(self, value: Dict[str, str]) -> None: + def append(self, value: Dict[str, Any]) -> None: if value.get("state", "") == "error": + self.num_errors += 1 + # If there is an error then there must be an exception in the dict. + self.exceptions.append(value.pop("exception")) + # Index is for indicating at which export block in the conf/json + # file did the export creation/update failed. + if len(self.faulty_export_block_indices) == 0: + self.faulty_export_block_indices = str(value.pop("index")) + else: + self.faulty_export_block_indices += f", {value.pop('index')}" self.has_error = True self.changes.append(value) @@ -177,7 +190,29 @@ class AppliedExportResults: return self.changes def mgr_return_value(self) -> int: - return -errno.EIO if self.has_error else 0 + if self.has_error: + if len(self.exceptions) == 1: + ex = self.exceptions[0] + if isinstance(ex, NFSException): + return ex.errno + # Some non-nfs exception occurred, this can be anything + # therefore return EAGAIN as a generalised errno. + return -errno.EAGAIN + # There are multiple failures so returning EIO as a generalised + # errno. + return -errno.EIO + return 0 + + def mgr_status_value(self) -> str: + if self.has_error: + if len(self.faulty_export_block_indices) == 1: + self.status = f"{str(self.exceptions[0])} for export block" \ + f" at index {self.faulty_export_block_indices}" + elif len(self.faulty_export_block_indices) > 1: + self.status = f"{self.num_errors} export blocks (at index" \ + f" {self.faulty_export_block_indices}) failed" \ + " to be created/updated" + return self.status class ExportMgr: @@ -501,7 +536,12 @@ class ExportMgr: aeresults = AppliedExportResults() for export in exports: - aeresults.append(self._change_export(cluster_id, export)) + changed_export = self._change_export(cluster_id, export) + # This will help figure out which export blocks in conf/json file + # are problematic. + if changed_export.get("state", "") == "error": + changed_export.update({"index": exports.index(export) + 1}) + aeresults.append(changed_export) return aeresults def _read_export_config(self, cluster_id: str, export_config: str) -> List[Dict]: @@ -525,7 +565,7 @@ class ExportMgr: return j # j is already a list object return [j] # return a single object list, with j as the only item - def _change_export(self, cluster_id: str, export: Dict) -> Dict[str, str]: + def _change_export(self, cluster_id: str, export: Dict) -> Dict[str, Any]: try: return self._apply_export(cluster_id, export) except NotImplementedError: @@ -543,7 +583,7 @@ class ExportMgr: except Exception as ex: msg = f'Failed to apply export: {ex}' log.exception(msg) - return {"state": "error", "msg": msg} + return {"state": "error", "msg": msg, "exception": ex} def _update_user_id( self, diff --git a/src/pybind/mgr/object_format.py b/src/pybind/mgr/object_format.py index b53bc3eb060..4a2b6fa8b69 100644 --- a/src/pybind/mgr/object_format.py +++ b/src/pybind/mgr/object_format.py @@ -228,13 +228,22 @@ class YAMLFormatter(Protocol): class ReturnValueProvider(Protocol): def mgr_return_value(self) -> int: - """Return an integer value to provide the Ceph MGR with a error code - for the MGR's response tuple. Zero means success. Return an negative + """Return an integer value to provide the Ceph MGR with an error code + for the MGR's response tuple. Zero means success. Return a negative errno otherwise. """ ... # pragma: no cover +class StatusValueProvider(Protocol): + def mgr_status_value(self) -> str: + """Return a string value to provide the Ceph MGR with an error status + for the MGR's response tuple. Empty string means success. Return a string + containing error info otherwise. + """ + ... # pragma: no cover + + class CommonFormatter(Protocol): """A protocol that indicates the type is a formatter for multiple possible formats. @@ -272,10 +281,15 @@ def _is_yaml_data_provider(obj: YAMLDataProvider) -> bool: def _is_return_value_provider(obj: ReturnValueProvider) -> bool: - """Return true if obj is usable as a YAMLDataProvider.""" + """Return true if obj is usable as a ReturnValueProvider.""" return callable(getattr(obj, 'mgr_return_value', None)) +def _is_status_value_provider(obj: StatusValueProvider) -> bool: + """Return true if obj is usable as a StatusValueProvider""" + return callable(getattr(obj, 'mgr_status_value', None)) + + class ObjectFormatAdapter: """A format adapater for a single object. Given an input object, this type will adapt the object, or a simplified @@ -366,6 +380,27 @@ class ReturnValueAdapter: return self.default_return_value +class StatusValueAdapter: + """A status-value adapter for an object. + Given an input object, this type will attempt to get a mgr status value + from the object if provides a `mgr_status_value` function. + If not it returns a default status value, typically an empty string. + """ + + def __init__( + self, + obj: Any, + default: str = "", + ) -> None: + self.obj = obj + self.default_status = default + + def mgr_status_value(self) -> str: + if _is_status_value_provider(self.obj): + return str(self.obj.mgr_status_value()) + return self.default_status + + class ErrorResponseBase(Exception): """An exception that can directly be converted to a mgr reponse.""" @@ -448,6 +483,7 @@ ObjectResponseFuncType = Union[ Callable[..., JSONDataProvider], Callable[..., YAMLDataProvider], Callable[..., ReturnValueProvider], + Callable[..., StatusValueProvider], ] @@ -487,6 +523,10 @@ class Responder: """Return a ReturnValueProvider for the given object.""" return ReturnValueAdapter(obj) + def _statusval_provider(self, obj: Any) -> StatusValueProvider: + """Return a StatusValueProvider for the given object.""" + return StatusValueAdapter(obj) + def _get_format_func( self, obj: Any, format_req: Optional[str] = None ) -> Callable: @@ -515,6 +555,12 @@ class Responder: """Return a mgr return-value for the given object (usually zero).""" return self._retval_provider(obj).mgr_return_value() + def _return_status(self, obj: Any) -> str: + """Return a mgr status-value for the given object (usually empty + string). + """ + return self._statusval_provider(obj).mgr_status_value() + def __call__(self, f: ObjectResponseFuncType) -> HandlerFuncType: """Wrap a python function so that the original function's return value becomes the source for an automatically formatted mgr response. @@ -528,9 +574,10 @@ class Responder: robj = f(*args, **kwargs) body = self._formatted(robj, format_req) retval = self._return_value(robj) + statusval = self._return_status(robj) except ErrorResponseBase as e: return e.format_response() - return retval, body, "" + return retval, body, statusval # set the extra args on our wrapper function. this will be consumed by # the CLICommand decorator and added to the set of optional arguments diff --git a/src/pybind/mgr/tests/test_object_format.py b/src/pybind/mgr/tests/test_object_format.py index d2fd20870e7..2e674c69838 100644 --- a/src/pybind/mgr/tests/test_object_format.py +++ b/src/pybind/mgr/tests/test_object_format.py @@ -115,12 +115,18 @@ def test_format_yaml(obj: Any, compatible: bool, yaml_val: str): class Retty: - def __init__(self, v) -> None: + def __init__(self, v, status="") -> None: self.value = v + self.status = status def mgr_return_value(self) -> int: return self.value + def mgr_status_value(self) -> str: + if self.status: + return self.status + return "NOPE" + @pytest.mark.parametrize( "obj, ret", @@ -139,6 +145,24 @@ def test_return_value(obj: Any, ret: int): assert rva.mgr_return_value() == ret +@pytest.mark.parametrize( + "obj, ret", + [ + ({}, ""), + ({"fish": "sticks"}, ""), + (-55, ""), + (Retty(0), "NOPE"), + (Retty(-55, "cake"), "cake"), + (Retty(-50, "pie"), "pie"), + ], +) +def test_return_status(obj: Any, ret: str): + rva = object_format.StatusValueAdapter(obj) + # a StatusValueAdapter instance meets the StatusValueProvider protocol. + assert object_format._is_status_value_provider(rva) + assert rva.mgr_status_value() == ret + + def test_valid_formats(): ofa = object_format.ObjectFormatAdapter({"fred": "wilma"}) vf = ofa.valid_formats() diff --git a/src/pybind/mgr/volumes/fs/async_cloner.py b/src/pybind/mgr/volumes/fs/async_cloner.py index 95f7d64e1b3..146d2e75590 100644 --- a/src/pybind/mgr/volumes/fs/async_cloner.py +++ b/src/pybind/mgr/volumes/fs/async_cloner.py @@ -191,7 +191,7 @@ def bulk_copy(fs_handle, source_path, dst_path, should_cancel): def set_quota_on_clone(fs_handle, clone_volumes_pair): src_path = clone_volumes_pair[1].snapshot_data_path(clone_volumes_pair[2]) dst_path = clone_volumes_pair[0].path - quota = None # type: Optional[int] + quota: Optional[int] = None try: quota = int(fs_handle.getxattr(src_path, 'ceph.quota.max_bytes').decode('utf-8')) except cephfs.NoData: @@ -205,7 +205,7 @@ def set_quota_on_clone(fs_handle, clone_volumes_pair): except cephfs.Error as e: raise VolumeException(-e.args[0], e.args[1]) - quota_files = None # type: Optional[int] + quota_files: Optional[int] = None try: quota_files = int(fs_handle.getxattr(src_path, 'ceph.quota.max_files').decode('utf-8')) except cephfs.NoData: @@ -221,19 +221,25 @@ def set_quota_on_clone(fs_handle, clone_volumes_pair): def do_clone(fs_client, volspec, volname, groupname, subvolname, should_cancel): with open_volume_lockless(fs_client, volname) as fs_handle: - with open_clone_subvolume_pair(fs_client, fs_handle, volspec, volname, groupname, subvolname) as clone_volumes: - src_path = clone_volumes[1].snapshot_data_path(clone_volumes[2]) - dst_path = clone_volumes[0].path + with open_clone_subvolume_pair(fs_client, fs_handle, volspec, volname, + groupname, subvolname) \ + as (subvol0, subvol1, subvol2): + src_path = subvol1.snapshot_data_path(subvol2) + dst_path = subvol0.path + # XXX: this is where cloning (of subvolume's snapshots) actually + # happens. bulk_copy(fs_handle, src_path, dst_path, should_cancel) - set_quota_on_clone(fs_handle, clone_volumes) + set_quota_on_clone(fs_handle, (subvol0, subvol1, subvol2)) def update_clone_failure_status(fs_client, volspec, volname, groupname, subvolname, ve): with open_volume_lockless(fs_client, volname) as fs_handle: - with open_clone_subvolume_pair(fs_client, fs_handle, volspec, volname, groupname, subvolname) as clone_volumes: + with open_clone_subvolume_pair(fs_client, fs_handle, volspec, volname, + groupname, subvolname) \ + as (subvol0, subvol1, subvol2) : if ve.errno == -errno.EINTR: - clone_volumes[0].add_clone_failure(-ve.errno, "user interrupted clone operation") + subvol0.add_clone_failure(-ve.errno, "user interrupted clone operation") else: - clone_volumes[0].add_clone_failure(-ve.errno, ve.error_str) + subvol0.add_clone_failure(-ve.errno, ve.error_str) def log_clone_failure(volname, groupname, subvolname, ve): if ve.errno == -errno.EINTR: @@ -261,8 +267,10 @@ def handle_clone_failed(fs_client, volspec, volname, index, groupname, subvolnam try: with open_volume(fs_client, volname) as fs_handle: # detach source but leave the clone section intact for later inspection - with open_clone_subvolume_pair(fs_client, fs_handle, volspec, volname, groupname, subvolname) as clone_volumes: - clone_volumes[1].detach_snapshot(clone_volumes[2], index) + with open_clone_subvolume_pair(fs_client, fs_handle, volspec, + volname, groupname, subvolname) \ + as (subvol0, subvol1, subvol2): + subvol1.detach_snapshot(subvol2, index) except (MetadataMgrException, VolumeException) as e: log.error("failed to detach clone from snapshot: {0}".format(e)) return (None, True) @@ -270,9 +278,11 @@ def handle_clone_failed(fs_client, volspec, volname, index, groupname, subvolnam def handle_clone_complete(fs_client, volspec, volname, index, groupname, subvolname, should_cancel): try: with open_volume(fs_client, volname) as fs_handle: - with open_clone_subvolume_pair(fs_client, fs_handle, volspec, volname, groupname, subvolname) as clone_volumes: - clone_volumes[1].detach_snapshot(clone_volumes[2], index) - clone_volumes[0].remove_clone_source(flush=True) + with open_clone_subvolume_pair(fs_client, fs_handle, volspec, + volname, groupname, subvolname) \ + as (subvol0, subvol1, subvol2): + subvol1.detach_snapshot(subvol2, index) + subvol0.remove_clone_source(flush=True) except (MetadataMgrException, VolumeException) as e: log.error("failed to detach clone from snapshot: {0}".format(e)) return (None, True) @@ -287,9 +297,14 @@ def start_clone_sm(fs_client, volspec, volname, index, groupname, subvolname, st time.sleep(snapshot_clone_delay) log.info("Delayed cloning ({0}, {1}, {2}) -- by {3} seconds".format(volname, groupname, subvolname, snapshot_clone_delay)) while not finished: + # XXX: this is where request operation is mapped to relevant + # function. handler = state_table.get(current_state, None) if not handler: raise VolumeException(-errno.EINVAL, "invalid clone state: \"{0}\"".format(current_state)) + # XXX: this is where the requested operation for subvolume's + # snapshot clone is performed. the function for the request + # operation is run through "handler". (next_state, finished) = handler(fs_client, volspec, volname, index, groupname, subvolname, should_cancel) if next_state: log.debug("({0}, {1}, {2}) transition state [\"{3}\" => \"{4}\"]".format(volname, groupname, subvolname,\ diff --git a/src/pybind/mgr/volumes/fs/operations/access.py b/src/pybind/mgr/volumes/fs/operations/access.py index 9b7b2431605..7e916e95539 100644 --- a/src/pybind/mgr/volumes/fs/operations/access.py +++ b/src/pybind/mgr/volumes/fs/operations/access.py @@ -4,7 +4,7 @@ from typing import List def prepare_updated_caps_list(existing_caps, mds_cap_str, osd_cap_str, authorize=True): - caps_list = [] # type: List[str] + caps_list: List[str] = [] for k, v in existing_caps['caps'].items(): if k == 'mds' or k == 'osd': continue diff --git a/src/pybind/mgr/volumes/fs/operations/lock.py b/src/pybind/mgr/volumes/fs/operations/lock.py index 7ef6923e115..9588ddec164 100644 --- a/src/pybind/mgr/volumes/fs/operations/lock.py +++ b/src/pybind/mgr/volumes/fs/operations/lock.py @@ -22,10 +22,10 @@ class GlobalLock(object): See: https://people.eecs.berkeley.edu/~kubitron/courses/cs262a-F14/projects/reports/project6_report.pdf """ - _shared_state = { + _shared_state: Dict = { 'lock' : Lock(), 'init' : False - } # type: Dict + } def __init__(self): with self._shared_state['lock']: diff --git a/src/pybind/mgr/volumes/fs/operations/pin_util.py b/src/pybind/mgr/volumes/fs/operations/pin_util.py index 9ea79e546e2..a12ab5b4d4b 100644 --- a/src/pybind/mgr/volumes/fs/operations/pin_util.py +++ b/src/pybind/mgr/volumes/fs/operations/pin_util.py @@ -1,4 +1,3 @@ -import os import errno import cephfs @@ -25,7 +24,7 @@ def pin(fs, path, pin_type, pin_setting): try: pin_setting = _pin_value[pin_type](pin_setting) - except ValueError as e: + except ValueError: raise VolumeException(-errno.EINVAL, f"pin value wrong type: {pin_setting}") try: diff --git a/src/pybind/mgr/volumes/fs/operations/trash.py b/src/pybind/mgr/volumes/fs/operations/trash.py index 66f1d71cf89..d76d43a43d1 100644 --- a/src/pybind/mgr/volumes/fs/operations/trash.py +++ b/src/pybind/mgr/volumes/fs/operations/trash.py @@ -6,7 +6,6 @@ from contextlib import contextmanager import cephfs from .template import GroupTemplate -from ..fs_util import listdir from ..exception import VolumeException log = logging.getLogger(__name__) diff --git a/src/pybind/mgr/volumes/fs/operations/versions/__init__.py b/src/pybind/mgr/volumes/fs/operations/versions/__init__.py index 544afa165f9..6c96c457019 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/__init__.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/__init__.py @@ -83,7 +83,7 @@ class SubvolumeLoader(object): subvolume_type = SubvolumeTypes.TYPE_NORMAL try: initial_state = SubvolumeOpSm.get_init_state(subvolume_type) - except OpSmException as oe: + except OpSmException: raise VolumeException(-errno.EINVAL, "subvolume creation failed: internal error") qpath = subvolume.base_path.decode('utf-8') # legacy is only upgradable to v1 diff --git a/src/pybind/mgr/volumes/fs/operations/versions/metadata_manager.py b/src/pybind/mgr/volumes/fs/operations/versions/metadata_manager.py index 718735d91b1..b33a2b48b77 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/metadata_manager.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/metadata_manager.py @@ -1,7 +1,6 @@ import os import errno import logging -import sys import threading import configparser import re diff --git a/src/pybind/mgr/volumes/fs/operations/versions/op_sm.py b/src/pybind/mgr/volumes/fs/operations/versions/op_sm.py index 1142600cbb2..93eafb2bde4 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/op_sm.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/op_sm.py @@ -19,7 +19,7 @@ class TransitionKey(object): return not(self == other) class SubvolumeOpSm(object): - transition_table = {} # type: Dict + transition_table: Dict = {} @staticmethod def is_complete_state(state): diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_base.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_base.py index 3bae0707a6a..8fbe177e5f4 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_base.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_base.py @@ -144,7 +144,7 @@ class SubvolumeBase(object): try: self.fs.stat(self.legacy_config_path) self.legacy_mode = True - except cephfs.Error as e: + except cephfs.Error: pass log.debug("loading config " @@ -160,7 +160,7 @@ class SubvolumeBase(object): def get_attrs(self, pathname): # get subvolume attributes - attrs = {} # type: Dict[str, Union[int, str, None]] + attrs: Dict[str, Union[int, str, None]] = {} stx = self.fs.statx(pathname, cephfs.CEPH_STATX_UID | cephfs.CEPH_STATX_GID | cephfs.CEPH_STATX_MODE, diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py index b5a10dd6c7f..88d186f3f15 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py @@ -55,7 +55,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): try: # no need to stat the path -- open() does that return self.metadata_mgr.get_global_option(MetadataManager.GLOBAL_META_KEY_PATH).encode('utf-8') - except MetadataMgrException as me: + except MetadataMgrException: raise VolumeException(-errno.EINVAL, "error fetching subvolume metadata") @property @@ -68,7 +68,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): try: # MDS treats this as a noop for already marked subvolume self.fs.setxattr(self.path, 'ceph.dir.subvolume', b'1', 0) - except cephfs.InvalidValue as e: + except cephfs.InvalidValue: raise VolumeException(-errno.EINVAL, "invalid value specified for ceph.dir.subvolume") except cephfs.Error as e: raise VolumeException(-e.args[0], e.args[1]) @@ -89,7 +89,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): subvolume_type = SubvolumeTypes.TYPE_NORMAL try: initial_state = SubvolumeOpSm.get_init_state(subvolume_type) - except OpSmException as oe: + except OpSmException: raise VolumeException(-errno.EINVAL, "subvolume creation failed: internal error") subvol_path = os.path.join(self.base_path, str(uuid.uuid4()).encode('utf-8')) @@ -156,7 +156,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): subvolume_type = SubvolumeTypes.TYPE_CLONE try: initial_state = SubvolumeOpSm.get_init_state(subvolume_type) - except OpSmException as oe: + except OpSmException: raise VolumeException(-errno.EINVAL, "clone failed: internal error") subvol_path = os.path.join(self.base_path, str(uuid.uuid4()).encode('utf-8')) @@ -596,7 +596,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): """ with self.auth_mdata_mgr.subvol_metadata_lock(self.group.groupname, self.subvolname): meta = self.auth_mdata_mgr.subvol_metadata_get(self.group.groupname, self.subvolname) - auths = [] # type: List[Dict[str,str]] + auths: List[Dict[str,str]] = [] if not meta or not meta['auths']: return auths @@ -669,7 +669,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): pass else: raise - except MetadataMgrException as me: + except MetadataMgrException: raise VolumeException(-errno.EINVAL, "error fetching subvolume metadata") return clone_source @@ -684,16 +684,16 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): def status(self): state = SubvolumeStates.from_value(self.metadata_mgr.get_global_option(MetadataManager.GLOBAL_META_KEY_STATE)) subvolume_type = self.subvol_type - subvolume_status = { - 'state' : state.value - } - if not SubvolumeOpSm.is_complete_state(state) and subvolume_type == SubvolumeTypes.TYPE_CLONE: - subvolume_status["source"] = self._get_clone_source() - if SubvolumeOpSm.is_failed_state(state) and subvolume_type == SubvolumeTypes.TYPE_CLONE: - try: - subvolume_status["failure"] = self._get_clone_failure() - except MetadataMgrException: - pass + subvolume_status = {'state' : state.value} + + if subvolume_type == SubvolumeTypes.TYPE_CLONE: + if not SubvolumeOpSm.is_complete_state(state): + subvolume_status["source"] = self._get_clone_source() + if SubvolumeOpSm.is_failed_state(state): + try: + subvolume_status["failure"] = self._get_clone_failure() + except MetadataMgrException: + pass return subvolume_status @@ -744,7 +744,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): raise def get_pending_clones(self, snapname): - pending_clones_info = {"has_pending_clones": "no"} # type: Dict[str, Any] + pending_clones_info: Dict[str, Any] = {"has_pending_clones": "no"} pending_track_id_list = [] pending_clone_list = [] index_path = "" @@ -777,7 +777,6 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): # If clone is completed between 'list_all_keys_with_specified_values_from_section' # and readlink(track_id_path) call then readlink will fail with error ENOENT (2) # Hence we double check whether track_id is exist in .meta file or not. - value = self.metadata_mgr.get_option('clone snaps', track_id) # Edge case scenario. # If track_id for clone exist but path /volumes/_index/clone/{track_id} not found # then clone is orphan. @@ -790,7 +789,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): path = Path(link_path.decode('utf-8')) clone_name = os.path.basename(link_path).decode('utf-8') group_name = os.path.basename(path.parent.absolute()) - details = {"name": clone_name} # type: Dict[str, str] + details = {"name": clone_name} if group_name != Group.NO_GROUP_NAME: details["target_group"] = group_name pending_clone_list.append(details) @@ -839,7 +838,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate): snap_info[key] = self.fs.getxattr(snappath, val) pending_clones_info = self.get_pending_clones(snapname) info_dict = {'created_at': str(datetime.fromtimestamp(float(snap_info['created_at']))), - 'data_pool': snap_info['data_pool'].decode('utf-8')} # type: Dict[str, Any] + 'data_pool': snap_info['data_pool'].decode('utf-8')} info_dict.update(pending_clones_info); return info_dict except cephfs.Error as e: diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py index 03085d04971..55d7f945b77 100644 --- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py +++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py @@ -10,7 +10,6 @@ from .metadata_manager import MetadataManager from .subvolume_attrs import SubvolumeTypes, SubvolumeStates, SubvolumeFeatures from .op_sm import SubvolumeOpSm from .subvolume_v1 import SubvolumeV1 -from ..template import SubvolumeTemplate from ...exception import OpSmException, VolumeException, MetadataMgrException from ...fs_util import listdir, create_base_dir from ..template import SubvolumeOpType @@ -99,7 +98,7 @@ class SubvolumeV2(SubvolumeV1): try: # MDS treats this as a noop for already marked subvolume self.fs.setxattr(self.base_path, 'ceph.dir.subvolume', b'1', 0) - except cephfs.InvalidValue as e: + except cephfs.InvalidValue: raise VolumeException(-errno.EINVAL, "invalid value specified for ceph.dir.subvolume") except cephfs.Error as e: raise VolumeException(-e.args[0], e.args[1]) @@ -159,7 +158,7 @@ class SubvolumeV2(SubvolumeV1): subvolume_type = SubvolumeTypes.TYPE_NORMAL try: initial_state = SubvolumeOpSm.get_init_state(subvolume_type) - except OpSmException as oe: + except OpSmException: raise VolumeException(-errno.EINVAL, "subvolume creation failed: internal error") retained = self.retained @@ -207,7 +206,7 @@ class SubvolumeV2(SubvolumeV1): subvolume_type = SubvolumeTypes.TYPE_CLONE try: initial_state = SubvolumeOpSm.get_init_state(subvolume_type) - except OpSmException as oe: + except OpSmException: raise VolumeException(-errno.EINVAL, "clone failed: internal error") retained = self.retained @@ -308,13 +307,17 @@ class SubvolumeV2(SubvolumeV1): op_type.value, self.subvolname, etype.value)) estate = self.state - if op_type not in self.allowed_ops_by_state(estate) and estate == SubvolumeStates.STATE_RETAINED: - raise VolumeException(-errno.ENOENT, "subvolume '{0}' is removed and has only snapshots retained".format( - self.subvolname)) - - if op_type not in self.allowed_ops_by_state(estate) and estate != SubvolumeStates.STATE_RETAINED: - raise VolumeException(-errno.EAGAIN, "subvolume '{0}' is not ready for operation {1}".format( - self.subvolname, op_type.value)) + if op_type not in self.allowed_ops_by_state(estate): + if estate == SubvolumeStates.STATE_RETAINED: + raise VolumeException( + -errno.ENOENT, + f'subvolume "{self.subvolname}" is removed and has ' + 'only snapshots retained') + else: + raise VolumeException( + -errno.EAGAIN, + f'subvolume "{self.subvolname}" is not ready for ' + f'operation "{op_type.value}"') if estate != SubvolumeStates.STATE_RETAINED: subvol_path = self.path diff --git a/src/pybind/mgr/volumes/fs/operations/volume.py b/src/pybind/mgr/volumes/fs/operations/volume.py index 395a3fb4ea0..67fbb891cef 100644 --- a/src/pybind/mgr/volumes/fs/operations/volume.py +++ b/src/pybind/mgr/volumes/fs/operations/volume.py @@ -40,7 +40,7 @@ def get_pool_names(mgr, volname): """ fs_map = mgr.get("fs_map") metadata_pool_id = None - data_pool_ids = [] # type: List[int] + data_pool_ids: List[int] = [] for f in fs_map['filesystems']: if volname == f['mdsmap']['fs_name']: metadata_pool_id = f['mdsmap']['metadata_pool'] @@ -61,7 +61,7 @@ def get_pool_ids(mgr, volname): """ fs_map = mgr.get("fs_map") metadata_pool_id = None - data_pool_ids = [] # type: List[int] + data_pool_ids: List[int] = [] for f in fs_map['filesystems']: if volname == f['mdsmap']['fs_name']: metadata_pool_id = f['mdsmap']['metadata_pool'] diff --git a/src/pybind/mgr/volumes/fs/volume.py b/src/pybind/mgr/volumes/fs/volume.py index 5c6642444b1..e19d1eafb2a 100644 --- a/src/pybind/mgr/volumes/fs/volume.py +++ b/src/pybind/mgr/volumes/fs/volume.py @@ -1,7 +1,6 @@ import json import errno import logging -import os import mgr_util from typing import TYPE_CHECKING @@ -17,7 +16,6 @@ from .operations.volume import create_volume, delete_volume, rename_volume, \ list_volumes, open_volume, get_pool_names, get_pool_ids, get_pending_subvol_deletions_count from .operations.subvolume import open_subvol, create_subvol, remove_subvol, \ create_clone -from .operations.trash import Trash from .vol_spec import VolSpec from .exception import VolumeException, ClusterError, ClusterTimeout, EvictionError @@ -338,7 +336,7 @@ class VolumeClient(CephfsClient["Module"]): with open_volume(self, volname) as fs_handle: with open_group(fs_handle, self.volspec, groupname) as group: with open_subvol(self.mgr, fs_handle, self.volspec, group, subvolname, SubvolumeOpType.EVICT) as subvolume: - key = subvolume.evict(volname, authid) + subvolume.evict(volname, authid) ret = 0, "", "" except (VolumeException, ClusterTimeout, ClusterError, EvictionError) as e: if isinstance(e, VolumeException): @@ -695,7 +693,7 @@ class VolumeClient(CephfsClient["Module"]): try: with open_volume(self, volname) as fs_handle: with open_group(fs_handle, self.volspec, groupname) as group: - with open_subvol(self.mgr, fs_handle, self.volspec, group, subvolname, SubvolumeOpType.SNAP_PROTECT) as subvolume: + with open_subvol(self.mgr, fs_handle, self.volspec, group, subvolname, SubvolumeOpType.SNAP_PROTECT): log.warning("snapshot protect call is deprecated and will be removed in a future release") except VolumeException as ve: ret = self.volume_exception_to_retval(ve) @@ -710,7 +708,7 @@ class VolumeClient(CephfsClient["Module"]): try: with open_volume(self, volname) as fs_handle: with open_group(fs_handle, self.volspec, groupname) as group: - with open_subvol(self.mgr, fs_handle, self.volspec, group, subvolname, SubvolumeOpType.SNAP_UNPROTECT) as subvolume: + with open_subvol(self.mgr, fs_handle, self.volspec, group, subvolname, SubvolumeOpType.SNAP_UNPROTECT): log.warning("snapshot unprotect call is deprecated and will be removed in a future release") except VolumeException as ve: ret = self.volume_exception_to_retval(ve) @@ -962,7 +960,7 @@ class VolumeClient(CephfsClient["Module"]): try: with open_volume(self, volname) as fs_handle: - with open_group(fs_handle, self.volspec, groupname) as group: + with open_group(fs_handle, self.volspec, groupname): # as subvolumes are marked with the vxattr ceph.dir.subvolume deny snapshots # at the subvolume group (see: https://tracker.ceph.com/issues/46074) # group.create_snapshot(snapname) diff --git a/src/pybind/rbd/setup.py b/src/pybind/rbd/setup.py index 1f20c3ed42f..eeb33c73d49 100755 --- a/src/pybind/rbd/setup.py +++ b/src/pybind/rbd/setup.py @@ -14,6 +14,7 @@ else: from distutils.ccompiler import new_compiler from distutils.errors import CompileError, LinkError from itertools import filterfalse, takewhile +from packaging import version import distutils.sysconfig @@ -148,11 +149,22 @@ else: sys.exit(1) cmdclass = {} +compiler_directives={'language_level': sys.version_info.major} try: from Cython.Build import cythonize from Cython.Distutils import build_ext + from Cython import __version__ as cython_version cmdclass = {'build_ext': build_ext} + + # Needed for building with Cython 0.x and Cython 3 from the same file, + # preserving the same behavior. + # When Cython 0.x builds go away, replace this compiler directive with + # noexcept on rbd_callback_t and librbd_progress_fn_t (or consider doing + # something similar to except? -9000 on rbd_diff_iterate2() callback for + # progress callbacks to propagate exceptions). + if version.parse(cython_version) >= version.parse('3'): + compiler_directives['legacy_implicit_noexcept'] = True except ImportError: print("WARNING: Cython is not installed.") @@ -197,7 +209,7 @@ setup( **ext_args ) ], - compiler_directives={'language_level': sys.version_info.major}, + compiler_directives=compiler_directives, build_dir=os.environ.get("CYTHON_BUILD_DIR", None), **cythonize_args ), diff --git a/src/python-common/ceph/rgw/types.py b/src/python-common/ceph/rgw/types.py index 3f65f9da00e..ddff7e15ea3 100644 --- a/src/python-common/ceph/rgw/types.py +++ b/src/python-common/ceph/rgw/types.py @@ -117,7 +117,6 @@ class RGWPeriod(JSONObj): self.epoch = period_dict['epoch'] self.master_zone = period_dict['master_zone'] self.master_zonegroup = period_dict['master_zonegroup'] - self.realm_name = period_dict['realm_name'] self.realm_id = period_dict['realm_id'] pm = period_dict['period_map'] self.zonegroups_by_id = {} diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt index d54c5b6afa9..00a9e4127e1 100644 --- a/src/rgw/CMakeLists.txt +++ b/src/rgw/CMakeLists.txt @@ -48,7 +48,6 @@ set(librgw_common_srcs services/svc_otp.cc services/svc_quota.cc services/svc_sync_modules.cc - services/svc_rados.cc services/svc_role_rados.cc services/svc_sys_obj.cc services/svc_sys_obj_cache.cc diff --git a/src/rgw/driver/daos/rgw_sal_daos.cc b/src/rgw/driver/daos/rgw_sal_daos.cc index 3687f57b8c8..69432056579 100644 --- a/src/rgw/driver/daos/rgw_sal_daos.cc +++ b/src/rgw/driver/daos/rgw_sal_daos.cc @@ -179,7 +179,7 @@ int DaosUser::read_stats(const DoutPrefixProvider* dpp, optional_yield y, /* stats - Not for first pass */ int DaosUser::read_stats_async(const DoutPrefixProvider* dpp, - RGWGetUserStats_CB* cb) { + boost::intrusive_ptr<ReadStatsCB> cb) { return DAOS_NOT_IMPLEMENTED_LOG(dpp); } @@ -495,7 +495,7 @@ int DaosBucket::read_stats(const DoutPrefixProvider* dpp, int DaosBucket::read_stats_async( const DoutPrefixProvider* dpp, const bucket_index_layout_generation& idx_layout, int shard_id, - RGWGetBucketStats_CB* ctx) { + boost::intrusive_ptr<ReadStatsCB> ctx) { return DAOS_NOT_IMPLEMENTED_LOG(dpp); } diff --git a/src/rgw/driver/daos/rgw_sal_daos.h b/src/rgw/driver/daos/rgw_sal_daos.h index 66f57006463..146ba7077a4 100644 --- a/src/rgw/driver/daos/rgw_sal_daos.h +++ b/src/rgw/driver/daos/rgw_sal_daos.h @@ -187,7 +187,7 @@ class DaosUser : public StoreUser { ceph::real_time* last_stats_sync = nullptr, ceph::real_time* last_stats_update = nullptr) override; virtual int read_stats_async(const DoutPrefixProvider* dpp, - RGWGetUserStats_CB* cb) override; + boost::intrusive_ptr<ReadStatsCB> cb) override; virtual int complete_flush_stats(const DoutPrefixProvider* dpp, optional_yield y) override; virtual int read_usage( @@ -309,7 +309,7 @@ class DaosBucket : public StoreBucket { virtual int read_stats_async(const DoutPrefixProvider* dpp, const bucket_index_layout_generation& idx_layout, int shard_id, - RGWGetBucketStats_CB* ctx) override; + boost::intrusive_ptr<ReadStatsCB> ctx) override; virtual int sync_user_stats(const DoutPrefixProvider* dpp, optional_yield y) override; virtual int check_bucket_shards(const DoutPrefixProvider* dpp) override; diff --git a/src/rgw/driver/dbstore/common/dbstore.h b/src/rgw/driver/dbstore/common/dbstore.h index fc436d55df8..8cf6f70f751 100644 --- a/src/rgw/driver/dbstore/common/dbstore.h +++ b/src/rgw/driver/dbstore/common/dbstore.h @@ -1771,14 +1771,13 @@ class DB { rgw_obj_key end_marker; std::string ns; bool enforce_ns; - RGWAccessListFilter* access_list_filter; + rgw::AccessListFilter access_list_filter; RGWBucketListNameFilter force_check_filter; bool list_versions; bool allow_unordered; Params() : enforce_ns(true), - access_list_filter(nullptr), list_versions(false), allow_unordered(false) {} diff --git a/src/rgw/driver/motr/rgw_sal_motr.cc b/src/rgw/driver/motr/rgw_sal_motr.cc index 3f5a46e4bc9..08053b9b90c 100644 --- a/src/rgw/driver/motr/rgw_sal_motr.cc +++ b/src/rgw/driver/motr/rgw_sal_motr.cc @@ -289,7 +289,7 @@ int MotrUser::read_stats(const DoutPrefixProvider *dpp, } /* stats - Not for first pass */ -int MotrUser::read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB *cb) +int MotrUser::read_stats_async(const DoutPrefixProvider *dpp, boost::intrusive_ptr<ReadStatsCB> cb) { return 0; } @@ -803,7 +803,7 @@ int MotrBucket::create_multipart_indices() int MotrBucket::read_stats_async(const DoutPrefixProvider *dpp, const bucket_index_layout_generation& idx_layout, - int shard_id, RGWGetBucketStats_CB *ctx) + int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx) { return 0; } diff --git a/src/rgw/driver/motr/rgw_sal_motr.h b/src/rgw/driver/motr/rgw_sal_motr.h index ecfabfc1518..ab0fc1d5283 100644 --- a/src/rgw/driver/motr/rgw_sal_motr.h +++ b/src/rgw/driver/motr/rgw_sal_motr.h @@ -241,7 +241,7 @@ class MotrUser : public StoreUser { optional_yield y, RGWStorageStats* stats, ceph::real_time *last_stats_sync = nullptr, ceph::real_time *last_stats_update = nullptr) override; - virtual int read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb) override; + virtual int read_stats_async(const DoutPrefixProvider *dpp, boost::intrusive_ptr<ReadStatsCB> cb) override; virtual int complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) override; virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter, @@ -372,7 +372,7 @@ class MotrBucket : public StoreBucket { bool *syncstopped = nullptr) override; virtual int read_stats_async(const DoutPrefixProvider *dpp, const bucket_index_layout_generation& idx_layout, - int shard_id, RGWGetBucketStats_CB* ctx) override; + int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx) override; int sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y, RGWBucketEnt* ent) override; int check_bucket_shards(const DoutPrefixProvider *dpp, diff --git a/src/rgw/driver/posix/rgw_sal_posix.cc b/src/rgw/driver/posix/rgw_sal_posix.cc index 23f9f83dabc..5c1e50ca5c9 100644 --- a/src/rgw/driver/posix/rgw_sal_posix.cc +++ b/src/rgw/driver/posix/rgw_sal_posix.cc @@ -916,7 +916,7 @@ int POSIXBucket::set_acl(const DoutPrefixProvider* dpp, acl.encode(aclbl); attrs[RGW_ATTR_ACL] = aclbl; - info.owner = acl.get_owner().get_id(); + info.owner = acl.get_owner().id; return write_attrs(dpp, y); } @@ -958,7 +958,7 @@ int POSIXBucket::read_stats(const DoutPrefixProvider *dpp, int POSIXBucket::read_stats_async(const DoutPrefixProvider *dpp, const bucket_index_layout_generation& idx_layout, - int shard_id, RGWGetBucketStats_CB* ctx) + int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx) { return 0; } diff --git a/src/rgw/driver/posix/rgw_sal_posix.h b/src/rgw/driver/posix/rgw_sal_posix.h index 1599808beff..3b76295b0c4 100644 --- a/src/rgw/driver/posix/rgw_sal_posix.h +++ b/src/rgw/driver/posix/rgw_sal_posix.h @@ -195,7 +195,7 @@ public: bool* syncstopped = nullptr) override; virtual int read_stats_async(const DoutPrefixProvider *dpp, const bucket_index_layout_generation& idx_layout, - int shard_id, RGWGetBucketStats_CB* ctx) override; + int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx) override; virtual int sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y, RGWBucketEnt* ent) override; virtual int check_bucket_shards(const DoutPrefixProvider* dpp, diff --git a/src/rgw/driver/rados/rgw_bucket.cc b/src/rgw/driver/rados/rgw_bucket.cc index bea3e9b09e5..64d8e31cc3c 100644 --- a/src/rgw/driver/rados/rgw_bucket.cc +++ b/src/rgw/driver/rados/rgw_bucket.cc @@ -882,17 +882,6 @@ int RGWBucket::sync(RGWBucketAdminOpState& op_state, const DoutPrefixProvider *d } -int RGWBucket::policy_bl_to_stream(bufferlist& bl, ostream& o) -{ - RGWAccessControlPolicy_S3 policy(g_ceph_context); - int ret = decode_bl(bl, policy); - if (ret < 0) { - ldout(driver->ctx(),0) << "failed to decode RGWAccessControlPolicy" << dendl; - } - policy.to_xml(o); - return 0; -} - int rgw_object_get_attr(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, rgw::sal::Object* obj, const char* attr_name, bufferlist& out_bl, optional_yield y) @@ -961,7 +950,7 @@ int RGWBucketAdminOp::get_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState int RGWBucketAdminOp::get_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, RGWFormatterFlusher& flusher, const DoutPrefixProvider *dpp, optional_yield y) { - RGWAccessControlPolicy policy(driver->ctx()); + RGWAccessControlPolicy policy; int ret = get_policy(driver, op_state, policy, dpp, y); if (ret < 0) @@ -983,13 +972,13 @@ int RGWBucketAdminOp::get_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState int RGWBucketAdminOp::dump_s3_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state, ostream& os, const DoutPrefixProvider *dpp, optional_yield y) { - RGWAccessControlPolicy_S3 policy(driver->ctx()); + RGWAccessControlPolicy policy; int ret = get_policy(driver, op_state, policy, dpp, y); if (ret < 0) return ret; - policy.to_xml(os); + rgw::s3::write_policy_xml(policy, os); return 0; } @@ -1068,9 +1057,9 @@ int RGWBucketAdminOp::link(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_s return -EIO; } - int r = static_cast<rgw::sal::RadosStore*>(driver)->ctl()->bucket->unlink_bucket(owner.get_id(), old_bucket->get_info().bucket, y, dpp, false); + int r = static_cast<rgw::sal::RadosStore*>(driver)->ctl()->bucket->unlink_bucket(owner.id, old_bucket->get_info().bucket, y, dpp, false); if (r < 0) { - set_err_msg(err, "could not unlink policy from user " + owner.get_id().to_str()); + set_err_msg(err, "could not unlink policy from user " + owner.id.to_str()); return r; } diff --git a/src/rgw/driver/rados/rgw_bucket.h b/src/rgw/driver/rados/rgw_bucket.h index 706683e6bd1..5af10b524de 100644 --- a/src/rgw/driver/rados/rgw_bucket.h +++ b/src/rgw/driver/rados/rgw_bucket.h @@ -355,7 +355,6 @@ public: int set_quota(RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, optional_yield y, std::string *err_msg = NULL); int remove_object(const DoutPrefixProvider *dpp, RGWBucketAdminOpState& op_state, optional_yield y, std::string *err_msg = NULL); - int policy_bl_to_stream(bufferlist& bl, std::ostream& o); int get_policy(RGWBucketAdminOpState& op_state, RGWAccessControlPolicy& policy, optional_yield y, const DoutPrefixProvider *dpp); int sync(RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, optional_yield y, std::string *err_msg = NULL); diff --git a/src/rgw/driver/rados/rgw_cr_rados.cc b/src/rgw/driver/rados/rgw_cr_rados.cc index e7174908dd7..2a4a02f29ad 100644 --- a/src/rgw/driver/rados/rgw_cr_rados.cc +++ b/src/rgw/driver/rados/rgw_cr_rados.cc @@ -152,7 +152,7 @@ int RGWSimpleRadosReadAttrsCR::send_request(const DoutPrefixProvider *dpp) } cn = stack->create_completion_notifier(); - return ref.pool.ioctx().aio_operate(ref.obj.oid, cn->completion(), &op, + return ref.ioctx.aio_operate(ref.obj.oid, cn->completion(), &op, nullptr); } @@ -234,7 +234,7 @@ int RGWAsyncLockSystemObj::_send_request(const DoutPrefixProvider *dpp) l.set_cookie(cookie); l.set_may_renew(true); - return l.lock_exclusive(&ref.pool.ioctx(), ref.obj.oid); + return l.lock_exclusive(&ref.ioctx, ref.obj.oid); } RGWAsyncLockSystemObj::RGWAsyncLockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, @@ -260,7 +260,7 @@ int RGWAsyncUnlockSystemObj::_send_request(const DoutPrefixProvider *dpp) l.set_cookie(cookie); - return l.unlock(&ref.pool.ioctx(), ref.obj.oid); + return l.unlock(&ref.ioctx, ref.obj.oid); } RGWAsyncUnlockSystemObj::RGWAsyncUnlockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store, @@ -303,7 +303,7 @@ int RGWRadosSetOmapKeysCR::send_request(const DoutPrefixProvider *dpp) op.omap_set(entries); cn = stack->create_completion_notifier(); - return ref.pool.ioctx().aio_operate(ref.obj.oid, cn->completion(), &op); + return ref.ioctx.aio_operate(ref.obj.oid, cn->completion(), &op); } int RGWRadosSetOmapKeysCR::request_complete() @@ -341,7 +341,7 @@ int RGWRadosGetOmapKeysCR::send_request(const DoutPrefixProvider *dpp) { op.omap_get_keys2(marker, max_entries, &result->entries, &result->more, nullptr); cn = stack->create_completion_notifier(result); - return result->ref.pool.ioctx().aio_operate(result->ref.obj.oid, cn->completion(), &op, NULL); + return result->ref.ioctx.aio_operate(result->ref.obj.oid, cn->completion(), &op, NULL); } int RGWRadosGetOmapKeysCR::request_complete() @@ -379,7 +379,7 @@ int RGWRadosGetOmapValsCR::send_request(const DoutPrefixProvider *dpp) { op.omap_get_vals2(marker, max_entries, &result->entries, &result->more, nullptr); cn = stack->create_completion_notifier(result); - return result->ref.pool.ioctx().aio_operate(result->ref.obj.oid, cn->completion(), &op, NULL); + return result->ref.ioctx.aio_operate(result->ref.obj.oid, cn->completion(), &op, NULL); } int RGWRadosGetOmapValsCR::request_complete() @@ -414,7 +414,7 @@ int RGWRadosRemoveOmapKeysCR::send_request(const DoutPrefixProvider *dpp) { op.omap_rm_keys(keys); cn = stack->create_completion_notifier(); - return ref.pool.ioctx().aio_operate(ref.obj.oid, cn->completion(), &op); + return ref.ioctx.aio_operate(ref.obj.oid, cn->completion(), &op); } int RGWRadosRemoveOmapKeysCR::request_complete() @@ -476,22 +476,11 @@ RGWRadosRemoveOidCR::RGWRadosRemoveOidCR(rgw::sal::RadosStore* store, } RGWRadosRemoveOidCR::RGWRadosRemoveOidCR(rgw::sal::RadosStore* store, - RGWSI_RADOS::Obj& obj, + rgw_rados_ref obj, RGWObjVersionTracker* objv_tracker) : RGWSimpleCoroutine(store->ctx()), - ioctx(librados::IoCtx(obj.get_ref().pool.ioctx())), - oid(obj.get_ref().obj.oid), - objv_tracker(objv_tracker) -{ - set_description() << "remove dest=" << oid; -} - -RGWRadosRemoveOidCR::RGWRadosRemoveOidCR(rgw::sal::RadosStore* store, - RGWSI_RADOS::Obj&& obj, - RGWObjVersionTracker* objv_tracker) - : RGWSimpleCoroutine(store->ctx()), - ioctx(std::move(obj.get_ref().pool.ioctx())), - oid(std::move(obj.get_ref().obj.oid)), + ioctx(std::move(obj.ioctx)), + oid(std::move(obj.obj.oid)), objv_tracker(objv_tracker) { set_description() << "remove dest=" << oid; @@ -917,7 +906,7 @@ int RGWAsyncRemoveObj::_send_request(const DoutPrefixProvider *dpp) std::unique_ptr<rgw::sal::Object::DeleteOp> del_op = obj->get_delete_op(); - del_op->params.bucket_owner = bucket->get_info().owner; + del_op->params.bucket_owner.id = bucket->get_info().owner; del_op->params.obj_owner = policy.get_owner(); if (del_if_older) { del_op->params.unmod_since = timestamp; @@ -927,8 +916,8 @@ int RGWAsyncRemoveObj::_send_request(const DoutPrefixProvider *dpp) } del_op->params.olh_epoch = versioned_epoch; del_op->params.marker_version_id = marker_version_id; - del_op->params.obj_owner.set_id(rgw_user(owner)); - del_op->params.obj_owner.set_name(owner_display_name); + del_op->params.obj_owner.id = rgw_user(owner); + del_op->params.obj_owner.display_name = owner_display_name; del_op->params.mtime = timestamp; del_op->params.high_precision_time = true; del_op->params.zones_trace = &zones_trace; @@ -1131,7 +1120,7 @@ int RGWRadosNotifyCR::send_request(const DoutPrefixProvider *dpp) set_status() << "sending request"; cn = stack->create_completion_notifier(); - return ref.pool.ioctx().aio_notify(ref.obj.oid, cn->completion(), request, + return ref.ioctx.aio_notify(ref.obj.oid, cn->completion(), request, timeout_ms, response); } diff --git a/src/rgw/driver/rados/rgw_cr_rados.h b/src/rgw/driver/rados/rgw_cr_rados.h index 676fd3d3cae..2cf99f98cc8 100644 --- a/src/rgw/driver/rados/rgw_cr_rados.h +++ b/src/rgw/driver/rados/rgw_cr_rados.h @@ -446,8 +446,7 @@ public: op.read(0, -1, &bl, nullptr); cn = stack->create_completion_notifier(); - return ref.pool.ioctx().aio_operate(ref.obj.oid, cn->completion(), &op, - nullptr); + return ref.ioctx.aio_operate(ref.obj.oid, cn->completion(), &op, nullptr); } int request_complete() { @@ -557,7 +556,7 @@ public: op.write_full(bl); cn = stack->create_completion_notifier(); - return ref.pool.ioctx().aio_operate(ref.obj.oid, cn->completion(), &op); + return ref.ioctx.aio_operate(ref.obj.oid, cn->completion(), &op); } int request_complete() override { @@ -624,7 +623,7 @@ public: return 0; } - return ref.pool.ioctx().aio_operate(ref.obj.oid, cn->completion(), &op); + return ref.ioctx.aio_operate(ref.obj.oid, cn->completion(), &op); } int request_complete() override { @@ -754,11 +753,7 @@ public: RGWObjVersionTracker* objv_tracker = nullptr); RGWRadosRemoveOidCR(rgw::sal::RadosStore* store, - RGWSI_RADOS::Obj& obj, - RGWObjVersionTracker* objv_tracker = nullptr); - - RGWRadosRemoveOidCR(rgw::sal::RadosStore* store, - RGWSI_RADOS::Obj&& obj, + rgw_rados_ref obj, RGWObjVersionTracker* objv_tracker = nullptr); int send_request(const DoutPrefixProvider *dpp) override; diff --git a/src/rgw/driver/rados/rgw_d3n_datacache.cc b/src/rgw/driver/rados/rgw_d3n_datacache.cc index 18d213cf6c3..c81954fce1c 100644 --- a/src/rgw/driver/rados/rgw_d3n_datacache.cc +++ b/src/rgw/driver/rados/rgw_d3n_datacache.cc @@ -104,7 +104,7 @@ void D3nDataCache::init(CephContext *_cct) { struct aioinit ainit{0}; ainit.aio_threads = cct->_conf.get_val<int64_t>("rgw_d3n_libaio_aio_threads"); ainit.aio_num = cct->_conf.get_val<int64_t>("rgw_d3n_libaio_aio_num"); - ainit.aio_idle_time = 10; + ainit.aio_idle_time = 5; aio_init(&ainit); #endif } diff --git a/src/rgw/driver/rados/rgw_d3n_datacache.h b/src/rgw/driver/rados/rgw_d3n_datacache.h index 196f892329f..58ac95d093d 100644 --- a/src/rgw/driver/rados/rgw_d3n_datacache.h +++ b/src/rgw/driver/rados/rgw_d3n_datacache.h @@ -195,10 +195,10 @@ int D3nRGWDataCache<T>::get_obj_iterate_cb(const DoutPrefixProvider *dpp, const return 0; } - auto obj = d->rgwrados->svc.rados->obj(read_obj); - r = obj.open(dpp); + rgw_rados_ref ref; + r = rgw_get_rados_ref(dpp, d->rgwrados->get_rados_handle(), read_obj, &ref); if (r < 0) { - lsubdout(g_ceph_context, rgw, 4) << "failed to open rados context for " << read_obj << dendl; + ldpp_dout(dpp, 4) << "failed to open rados context for " << read_obj << dendl; return r; } @@ -208,8 +208,7 @@ int D3nRGWDataCache<T>::get_obj_iterate_cb(const DoutPrefixProvider *dpp, const const uint64_t cost = len; const uint64_t id = obj_ofs; // use logical object offset for sorting replies - auto& ref = obj.get_ref(); - auto completed = d->aio->get(ref.obj, rgw::Aio::librados_op(ref.pool.ioctx(), std::move(op), d->yield), cost, id); + auto completed = d->aio->get(ref.obj, rgw::Aio::librados_op(ref.ioctx, std::move(op), d->yield), cost, id); return d->flush(std::move(completed)); } else { ldpp_dout(dpp, 20) << "D3nDataCache::" << __func__ << "(): oid=" << read_obj.oid << ", is_head_obj=" << is_head_obj << ", obj-ofs=" << obj_ofs << ", read_ofs=" << read_ofs << ", len=" << len << dendl; @@ -221,20 +220,19 @@ int D3nRGWDataCache<T>::get_obj_iterate_cb(const DoutPrefixProvider *dpp, const const uint64_t id = obj_ofs; // use logical object offset for sorting replies oid = read_obj.oid; - auto obj = d->rgwrados->svc.rados->obj(read_obj); - r = obj.open(dpp); + rgw_rados_ref ref; + r = rgw_get_rados_ref(dpp, d->rgwrados->get_rados_handle(), read_obj, &ref); if (r < 0) { - lsubdout(g_ceph_context, rgw, 0) << "D3nDataCache: Error: failed to open rados context for " << read_obj << ", r=" << r << dendl; + ldpp_dout(dpp, 4) << "failed to open rados context for " << read_obj << dendl; return r; } - auto& ref = obj.get_ref(); const bool is_compressed = (astate->attrset.find(RGW_ATTR_COMPRESSION) != astate->attrset.end()); const bool is_encrypted = (astate->attrset.find(RGW_ATTR_CRYPT_MODE) != astate->attrset.end()); if (read_ofs != 0 || astate->size != astate->accounted_size || is_compressed || is_encrypted) { d->d3n_bypass_cache_write = true; lsubdout(g_ceph_context, rgw, 5) << "D3nDataCache: " << __func__ << "(): Note - bypassing datacache: oid=" << read_obj.oid << ", read_ofs!=0 = " << read_ofs << ", size=" << astate->size << " != accounted_size=" << astate->accounted_size << ", is_compressed=" << is_compressed << ", is_encrypted=" << is_encrypted << dendl; - auto completed = d->aio->get(ref.obj, rgw::Aio::librados_op(ref.pool.ioctx(), std::move(op), d->yield), cost, id); + auto completed = d->aio->get(ref.obj, rgw::Aio::librados_op(ref.ioctx, std::move(op), d->yield), cost, id); r = d->flush(std::move(completed)); return r; } @@ -251,7 +249,7 @@ int D3nRGWDataCache<T>::get_obj_iterate_cb(const DoutPrefixProvider *dpp, const } else { // Write To Cache ldpp_dout(dpp, 20) << "D3nDataCache: " << __func__ << "(): WRITE TO CACHE: oid=" << read_obj.oid << ", obj-ofs=" << obj_ofs << ", read_ofs=" << read_ofs << " len=" << len << dendl; - auto completed = d->aio->get(ref.obj, rgw::Aio::librados_op(ref.pool.ioctx(), std::move(op), d->yield), cost, id); + auto completed = d->aio->get(ref.obj, rgw::Aio::librados_op(ref.ioctx, std::move(op), d->yield), cost, id); return d->flush(std::move(completed)); } } diff --git a/src/rgw/driver/rados/rgw_data_sync.cc b/src/rgw/driver/rados/rgw_data_sync.cc index 02965f9073d..7b273e68129 100644 --- a/src/rgw/driver/rados/rgw_data_sync.cc +++ b/src/rgw/driver/rados/rgw_data_sync.cc @@ -1457,7 +1457,7 @@ public: } if (complete->timestamp != ceph::real_time{}) { tn->log(10, SSTR("writing " << *complete << " to error repo for retry")); - yield call(rgw::error_repo::write_cr(sync_env->driver->svc()->rados, error_repo, + yield call(rgw::error_repo::write_cr(sync_env->driver->getRados()->get_rados_handle(), error_repo, rgw::error_repo::encode_key(complete->bs, complete->gen), complete->timestamp)); if (retcode < 0) { @@ -1465,7 +1465,7 @@ public: } } } else if (complete->retry) { - yield call(rgw::error_repo::remove_cr(sync_env->driver->svc()->rados, error_repo, + yield call(rgw::error_repo::remove_cr(sync_env->driver->getRados()->get_rados_handle(), error_repo, rgw::error_repo::encode_key(complete->bs, complete->gen), complete->timestamp)); if (retcode < 0) { @@ -1529,7 +1529,7 @@ public: if (retcode == -ENOENT) { // don't retry if bucket instance does not exist tn->log(10, SSTR("bucket instance or log layout does not exist on source for bucket " << source_bs.bucket)); - yield call(rgw::error_repo::remove_cr(sync_env->driver->svc()->rados, error_repo, + yield call(rgw::error_repo::remove_cr(sync_env->driver->getRados()->get_rados_handle(), error_repo, error_marker, timestamp)); return set_cr_done(); } else if (retcode < 0) { @@ -1544,7 +1544,7 @@ public: pool = sync_env->svc->zone->get_zone_params().log_pool; error_repo = datalog_oid_for_error_repo(sc, sync_env->driver, pool, source_bs); tn->log(10, SSTR("writing shard_id " << sid << " of gen " << each->gen << " to error repo for retry")); - yield_spawn_window(rgw::error_repo::write_cr(sync_env->driver->svc()->rados, error_repo, + yield_spawn_window(rgw::error_repo::write_cr(sync_env->driver->getRados()->get_rados_handle(), error_repo, rgw::error_repo::encode_key(bs, each->gen), timestamp), sc->lcc.adj_concurrency(cct->_conf->rgw_data_sync_spawn_window), [&](uint64_t stack_id, int ret) { @@ -1563,7 +1563,7 @@ public: }); // once everything succeeds, remove the full sync obligation from the error repo - yield call(rgw::error_repo::remove_cr(sync_env->driver->svc()->rados, error_repo, + yield call(rgw::error_repo::remove_cr(sync_env->driver->getRados()->get_rados_handle(), error_repo, error_marker, timestamp)); return set_cr_done(); } @@ -1648,7 +1648,7 @@ public: if (retcode < 0) { tn->log(10, SSTR("full sync: failed to read remote bucket info. Writing " << source_bs.shard_id << " to error repo for retry")); - yield call(rgw::error_repo::write_cr(sync_env->driver->svc()->rados, error_repo, + yield call(rgw::error_repo::write_cr(sync_env->driver->getRados()->get_rados_handle(), error_repo, rgw::error_repo::encode_key(source_bs, std::nullopt), timestamp)); if (retcode < 0) { @@ -1670,7 +1670,7 @@ public: timestamp = timestamp_for_bucket_shard(sync_env->driver, sync_status, source_bs); if (retcode < 0) { tn->log(10, SSTR("Write " << source_bs.shard_id << " to error repo for retry")); - yield_spawn_window(rgw::error_repo::write_cr(sync_env->driver->svc()->rados, error_repo, + yield_spawn_window(rgw::error_repo::write_cr(sync_env->driver->getRados()->get_rados_handle(), error_repo, rgw::error_repo::encode_key(source_bs, each->gen), timestamp), sc->lcc.adj_concurrency(cct->_conf->rgw_data_sync_spawn_window), std::nullopt); } else { @@ -2016,7 +2016,7 @@ public: } if (retcode < 0) { tn->log(1, SSTR("failed to parse bucket shard: " << error_marker)); - spawn(rgw::error_repo::remove_cr(sc->env->driver->svc()->rados, + spawn(rgw::error_repo::remove_cr(sc->env->driver->getRados()->get_rados_handle(), error_repo, error_marker, entry_timestamp), false); @@ -2122,9 +2122,9 @@ public: } while (true); drain_all(); - yield marker_tracker->flush(); if (lost_bid) { + yield call(marker_tracker->flush()); return set_cr_error(-EBUSY); } else if (lost_lock) { return set_cr_error(-ECANCELED); @@ -2675,8 +2675,6 @@ public: static int policy_from_attrs(CephContext *cct, const map<string, bufferlist>& attrs, RGWAccessControlPolicy *acl) { - acl->set_ctx(cct); - auto aiter = attrs.find(RGW_ATTR_ACL); if (aiter == attrs.end()) { return -ENOENT; @@ -2727,8 +2725,8 @@ bool RGWUserPermHandler::Bucket::verify_bucket_permission(int perm) { return verify_bucket_permission_no_policy(sync_env->dpp, &(*ps), - &info->user_acl, - &bucket_acl, + info->user_acl, + bucket_acl, perm); } @@ -2744,8 +2742,8 @@ bool RGWUserPermHandler::Bucket::verify_object_permission(const map<string, buff return verify_bucket_permission_no_policy(sync_env->dpp, &(*ps), - &bucket_acl, - &obj_acl, + bucket_acl, + obj_acl, perm); } @@ -5160,8 +5158,11 @@ int RGWBucketShardIncrementalSyncCR::operate(const DoutPrefixProvider *dpp) } yield { // delete the shard status object - auto status_obj = sync_env->svc->rados->obj(marker_tracker.get_obj()); - retcode = status_obj.open(dpp); + rgw_rados_ref status_obj; + retcode = rgw_get_rados_ref(dpp, + sync_env->driver->getRados()->get_rados_handle(), + marker_tracker.get_obj(), + &status_obj); if (retcode < 0) { return set_cr_error(retcode); } @@ -6009,7 +6010,7 @@ int RGWSyncBucketCR::operate(const DoutPrefixProvider *dpp) // use the error repo and sync status timestamp from the datalog shard corresponding to source_bs error_repo = datalog_oid_for_error_repo(sc, sc->env->driver, pool, source_bs); - yield call(rgw::error_repo::write_cr(sc->env->driver->svc()->rados, error_repo, + yield call(rgw::error_repo::write_cr(sc->env->driver->getRados()->get_rados_handle(), error_repo, rgw::error_repo::encode_key(source_bs, current_gen), ceph::real_clock::zero())); if (retcode < 0) { @@ -6068,7 +6069,7 @@ int RGWBucketPipeSyncStatusManager::do_init(const DoutPrefixProvider *dpp, } sync_module.reset(new RGWDefaultSyncModuleInstance()); - auto async_rados = driver->svc()->rados->get_async_processor(); + auto async_rados = driver->svc()->async_processor; sync_env.init(this, driver->ctx(), driver, driver->svc(), async_rados, &http_manager, @@ -6680,7 +6681,7 @@ int rgw_read_bucket_inc_sync_status(const DoutPrefixProvider *dpp, RGWDataSyncEnv env; RGWSyncModuleInstanceRef module; // null sync module - env.init(dpp, driver->ctx(), driver, driver->svc(), driver->svc()->rados->get_async_processor(), + env.init(dpp, driver->ctx(), driver, driver->svc(), driver->svc()->async_processor, nullptr, nullptr, nullptr, module, nullptr); RGWDataSyncCtx sc; diff --git a/src/rgw/driver/rados/rgw_lc_tier.cc b/src/rgw/driver/rados/rgw_lc_tier.cc index 2dd8a6625fc..67df0244595 100644 --- a/src/rgw/driver/rados/rgw_lc_tier.cc +++ b/src/rgw/driver/rados/rgw_lc_tier.cc @@ -464,7 +464,6 @@ int RGWLCStreamRead::init_rest_obj() { */ init_headers(attrs, rest_obj.attrs); - rest_obj.acls.set_ctx(cct); const auto aiter = attrs.find(RGW_ATTR_ACL); if (aiter != attrs.end()) { bufferlist& bl = aiter->second; diff --git a/src/rgw/driver/rados/rgw_object_expirer_core.cc b/src/rgw/driver/rados/rgw_object_expirer_core.cc index 72c21c1d262..269043f94df 100644 --- a/src/rgw/driver/rados/rgw_object_expirer_core.cc +++ b/src/rgw/driver/rados/rgw_object_expirer_core.cc @@ -32,7 +32,6 @@ #include "rgw_zone.h" #include "rgw_sal_rados.h" -#include "services/svc_rados.h" #include "services/svc_zone.h" #include "services/svc_sys_obj.h" #include "services/svc_bi_rados.h" @@ -108,8 +107,11 @@ int RGWObjExpStore::objexp_hint_add(const DoutPrefixProvider *dpp, cls_timeindex_add(op, utime_t(delete_at), keyext, hebl); string shard_name = objexp_hint_get_shardname(objexp_key_shard(obj_key, cct->_conf->rgw_objexp_hints_num_shards)); - auto obj = rados_svc->obj(rgw_raw_obj(driver->svc()->zone->get_zone_params().log_pool, shard_name)); - int r = obj.open(dpp); + rgw_rados_ref obj; + int r = rgw_get_rados_ref(dpp, driver->getRados()->get_rados_handle(), + { driver->svc()->zone->get_zone_params().log_pool, + shard_name }, + &obj); if (r < 0) { ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): failed to open obj=" << obj << " (r=" << r << ")" << dendl; return r; @@ -131,8 +133,10 @@ int RGWObjExpStore::objexp_hint_list(const DoutPrefixProvider *dpp, cls_timeindex_list(op, utime_t(start_time), utime_t(end_time), marker, max_entries, entries, out_marker, truncated); - auto obj = rados_svc->obj(rgw_raw_obj(driver->svc()->zone->get_zone_params().log_pool, oid)); - int r = obj.open(dpp); + rgw_rados_ref obj; + int r = rgw_get_rados_ref(dpp, driver->getRados()->get_rados_handle(), + { driver->svc()->zone->get_zone_params().log_pool, + oid }, &obj); if (r < 0) { ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): failed to open obj=" << obj << " (r=" << r << ")" << dendl; return r; @@ -163,7 +167,7 @@ static int cls_timeindex_trim_repeat(const DoutPrefixProvider *dpp, do { librados::ObjectWriteOperation op; cls_timeindex_trim(op, from_time, to_time, from_marker, to_marker); - int r = rgw_rados_operate(dpp, ref.pool.ioctx(), oid, &op, null_yield); + int r = rgw_rados_operate(dpp, ref.ioctx, oid, &op, null_yield); if (r == -ENODATA) done = true; else if (r < 0) @@ -180,15 +184,17 @@ int RGWObjExpStore::objexp_hint_trim(const DoutPrefixProvider *dpp, const string& from_marker, const string& to_marker, optional_yield y) { - auto obj = rados_svc->obj(rgw_raw_obj(driver->svc()->zone->get_zone_params().log_pool, oid)); - int r = obj.open(dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): failed to open obj=" << obj << " (r=" << r << ")" << dendl; - return r; + rgw_rados_ref ref; + auto ret = rgw_get_rados_ref(dpp, driver->getRados()->get_rados_handle(), + {driver->svc()->zone->get_zone_params().log_pool, oid}, + &ref); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): failed to open oid=" + << oid << " (r=" << ret << ")" << dendl; + return ret; } - auto& ref = obj.get_ref(); - int ret = cls_timeindex_trim_repeat(dpp, ref, oid, utime_t(start_time), utime_t(end_time), - from_marker, to_marker, y); + ret = cls_timeindex_trim_repeat(dpp, ref, oid, utime_t(start_time), utime_t(end_time), + from_marker, to_marker, y); if ((ret < 0 ) && (ret != -ENOENT)) { return ret; } diff --git a/src/rgw/driver/rados/rgw_object_expirer_core.h b/src/rgw/driver/rados/rgw_object_expirer_core.h index b616570b82f..d23457fe75f 100644 --- a/src/rgw/driver/rados/rgw_object_expirer_core.h +++ b/src/rgw/driver/rados/rgw_object_expirer_core.h @@ -30,19 +30,16 @@ #include "rgw_sal_rados.h" -class RGWSI_RADOS; class RGWSI_Zone; class RGWBucketInfo; class cls_timeindex_entry; class RGWObjExpStore { CephContext *cct; - RGWSI_RADOS *rados_svc; rgw::sal::RadosStore* driver; public: - RGWObjExpStore(CephContext *_cct, RGWSI_RADOS *_rados_svc, rgw::sal::RadosStore* _driver) : cct(_cct), - rados_svc(_rados_svc), - driver(_driver) {} + RGWObjExpStore(CephContext *_cct, rgw::sal::RadosStore* _driver) : cct(_cct), + driver(_driver) {} int objexp_hint_add(const DoutPrefixProvider *dpp, const ceph::real_time& delete_at, @@ -101,7 +98,7 @@ protected: public: explicit RGWObjectExpirer(rgw::sal::Driver* _driver) : driver(_driver), - exp_store(_driver->ctx(), static_cast<rgw::sal::RadosStore*>(driver)->svc()->rados, static_cast<rgw::sal::RadosStore*>(driver)), + exp_store(_driver->ctx(), static_cast<rgw::sal::RadosStore*>(driver)), worker(NULL) { } ~RGWObjectExpirer() { diff --git a/src/rgw/driver/rados/rgw_period.cc b/src/rgw/driver/rados/rgw_period.cc index 61602b354e2..4a16faccefb 100644 --- a/src/rgw/driver/rados/rgw_period.cc +++ b/src/rgw/driver/rados/rgw_period.cc @@ -154,7 +154,7 @@ static int read_sync_status(const DoutPrefixProvider *dpp, rgw::sal::Driver* dri { rgw::sal::RadosStore* rados_store = static_cast<rgw::sal::RadosStore*>(driver); // initialize a sync status manager to read the status - RGWMetaSyncStatusManager mgr(rados_store, rados_store->svc()->rados->get_async_processor()); + RGWMetaSyncStatusManager mgr(rados_store, rados_store->svc()->async_processor); int r = mgr.init(dpp); if (r < 0) { return r; diff --git a/src/rgw/driver/rados/rgw_pubsub_push.cc b/src/rgw/driver/rados/rgw_pubsub_push.cc index bdb24ce9ad1..05dc9e65d0e 100644 --- a/src/rgw/driver/rados/rgw_pubsub_push.cc +++ b/src/rgw/driver/rados/rgw_pubsub_push.cc @@ -115,6 +115,55 @@ public: } }; +namespace { +// this allows waiting untill "finish()" is called from a different thread +// waiting could be blocking the waiting thread or yielding, depending +// with compilation flag support and whether the optional_yield is set +class Waiter { + using Signature = void(boost::system::error_code); + using Completion = ceph::async::Completion<Signature>; + using CompletionInit = boost::asio::async_completion<yield_context, Signature>; + std::unique_ptr<Completion> completion = nullptr; + int ret; + + bool done = false; + mutable std::mutex lock; + mutable std::condition_variable cond; + +public: + int wait(optional_yield y) { + std::unique_lock l{lock}; + if (done) { + return ret; + } + if (y) { + boost::system::error_code ec; + auto&& token = y.get_yield_context()[ec]; + CompletionInit init(token); + completion = Completion::create(y.get_io_context().get_executor(), + std::move(init.completion_handler)); + l.unlock(); + init.result.get(); + return -ec.value(); + } + cond.wait(l, [this]{return (done==true);}); + return ret; + } + + void finish(int r) { + std::unique_lock l{lock}; + ret = r; + done = true; + if (completion) { + boost::system::error_code ec(-ret, boost::system::system_category()); + Completion::post(std::move(completion), ec); + } else { + cond.notify_all(); + } + } +}; +} // namespace + #ifdef WITH_RADOSGW_AMQP_ENDPOINT class RGWPubSubAMQPEndpoint : public RGWPubSubEndpoint { private: @@ -187,71 +236,17 @@ public: } } - // this allows waiting untill "finish()" is called from a different thread - // waiting could be blocking the waiting thread or yielding, depending - // with compilation flag support and whether the optional_yield is set - class Waiter { - using Signature = void(boost::system::error_code); - using Completion = ceph::async::Completion<Signature>; - std::unique_ptr<Completion> completion = nullptr; - int ret; - - mutable std::atomic<bool> done = false; - mutable std::mutex lock; - mutable std::condition_variable cond; - - template <typename ExecutionContext, typename CompletionToken> - auto async_wait(ExecutionContext& ctx, CompletionToken&& token) { - boost::asio::async_completion<CompletionToken, Signature> init(token); - auto& handler = init.completion_handler; - { - std::unique_lock l{lock}; - completion = Completion::create(ctx.get_executor(), std::move(handler)); - } - return init.result.get(); - } - - public: - int wait(optional_yield y) { - if (done) { - return ret; - } - if (y) { - auto& io_ctx = y.get_io_context(); - auto& yield_ctx = y.get_yield_context(); - boost::system::error_code ec; - async_wait(io_ctx, yield_ctx[ec]); - return -ec.value(); - } - std::unique_lock l(lock); - cond.wait(l, [this]{return (done==true);}); - return ret; - } - - void finish(int r) { - std::unique_lock l{lock}; - ret = r; - done = true; - if (completion) { - boost::system::error_code ec(-ret, boost::system::system_category()); - Completion::post(std::move(completion), ec); - } else { - cond.notify_all(); - } - } - }; - int send_to_completion_async(CephContext* cct, const rgw_pubsub_s3_event& event, optional_yield y) override { if (ack_level == ack_level_t::None) { return amqp::publish(conn_id, topic, json_format_pubsub_event(event)); } else { // TODO: currently broker and routable are the same - this will require different flags but the same mechanism - // note: dynamic allocation of Waiter is needed when this is invoked from a beast coroutine - auto w = std::unique_ptr<Waiter>(new Waiter); + auto w = std::make_unique<Waiter>(); const auto rc = amqp::publish_with_confirm(conn_id, topic, json_format_pubsub_event(event), - std::bind(&Waiter::finish, w.get(), std::placeholders::_1)); + [wp = w.get()](int r) { wp->finish(r);} + ); if (rc < 0) { // failed to publish, does not wait for reply return rc; @@ -314,70 +309,16 @@ public: } } - // this allows waiting untill "finish()" is called from a different thread - // waiting could be blocking the waiting thread or yielding, depending - // with compilation flag support and whether the optional_yield is set - class Waiter { - using Signature = void(boost::system::error_code); - using Completion = ceph::async::Completion<Signature>; - std::unique_ptr<Completion> completion = nullptr; - int ret; - - mutable std::atomic<bool> done = false; - mutable std::mutex lock; - mutable std::condition_variable cond; - - template <typename ExecutionContext, typename CompletionToken> - auto async_wait(ExecutionContext& ctx, CompletionToken&& token) { - boost::asio::async_completion<CompletionToken, Signature> init(token); - auto& handler = init.completion_handler; - { - std::unique_lock l{lock}; - completion = Completion::create(ctx.get_executor(), std::move(handler)); - } - return init.result.get(); - } - - public: - int wait(optional_yield y) { - if (done) { - return ret; - } - if (y) { - auto& io_ctx = y.get_io_context(); - auto& yield_ctx = y.get_yield_context(); - boost::system::error_code ec; - async_wait(io_ctx, yield_ctx[ec]); - return -ec.value(); - } - std::unique_lock l(lock); - cond.wait(l, [this]{return (done==true);}); - return ret; - } - - void finish(int r) { - std::unique_lock l{lock}; - ret = r; - done = true; - if (completion) { - boost::system::error_code ec(-ret, boost::system::system_category()); - Completion::post(std::move(completion), ec); - } else { - cond.notify_all(); - } - } - }; - int send_to_completion_async(CephContext* cct, const rgw_pubsub_s3_event& event, optional_yield y) override { if (ack_level == ack_level_t::None) { return kafka::publish(conn_name, topic, json_format_pubsub_event(event)); } else { - // note: dynamic allocation of Waiter is needed when this is invoked from a beast coroutine - auto w = std::unique_ptr<Waiter>(new Waiter); + auto w = std::make_unique<Waiter>(); const auto rc = kafka::publish_with_confirm(conn_name, topic, json_format_pubsub_event(event), - std::bind(&Waiter::finish, w.get(), std::placeholders::_1)); + [wp = w.get()](int r) { wp->finish(r); } + ); if (rc < 0) { // failed to publish, does not wait for reply return rc; diff --git a/src/rgw/driver/rados/rgw_putobj_processor.cc b/src/rgw/driver/rados/rgw_putobj_processor.cc index 091adb9830c..67e8a3a998e 100644 --- a/src/rgw/driver/rados/rgw_putobj_processor.cc +++ b/src/rgw/driver/rados/rgw_putobj_processor.cc @@ -126,8 +126,8 @@ void RadosWriter::add_write_hint(librados::ObjectWriteOperation& op) { int RadosWriter::set_stripe_obj(const rgw_raw_obj& raw_obj) { - stripe_obj = store->svc.rados->obj(raw_obj); - return stripe_obj.open(dpp); + return rgw_get_rados_ref(dpp, store->get_rados_handle(), raw_obj, + &stripe_obj); } int RadosWriter::process(bufferlist&& bl, uint64_t offset) @@ -145,8 +145,9 @@ int RadosWriter::process(bufferlist&& bl, uint64_t offset) op.write(offset, data); } constexpr uint64_t id = 0; // unused - auto& ref = stripe_obj.get_ref(); - auto c = aio->get(ref.obj, Aio::librados_op(ref.pool.ioctx(), std::move(op), y), cost, id); + auto c = aio->get(stripe_obj.obj, Aio::librados_op(stripe_obj.ioctx, + std::move(op), y), + cost, id); return process_completed(c, &written); } @@ -160,8 +161,9 @@ int RadosWriter::write_exclusive(const bufferlist& data) op.write_full(data); constexpr uint64_t id = 0; // unused - auto& ref = stripe_obj.get_ref(); - auto c = aio->get(ref.obj, Aio::librados_op(ref.pool.ioctx(), std::move(op), y), cost, id); + auto c = aio->get(stripe_obj.obj, Aio::librados_op(stripe_obj.ioctx, + std::move(op), y), + cost, id); auto d = aio->drain(); c.splice(c.end(), d); return process_completed(c, &written); @@ -553,7 +555,7 @@ int MultipartObjectProcessor::complete(size_t accounted_size, librados::ObjectWriteOperation op; cls_rgw_mp_upload_part_info_update(op, p, info); - r = rgw_rados_operate(rctx.dpp, meta_obj_ref.pool.ioctx(), meta_obj_ref.obj.oid, &op, rctx.y); + r = rgw_rados_operate(rctx.dpp, meta_obj_ref.ioctx, meta_obj_ref.obj.oid, &op, rctx.y); ldpp_dout(rctx.dpp, 20) << "Update meta: " << meta_obj_ref.obj.oid << " part " << p << " prefix " << info.manifest.get_prefix() << " return " << r << dendl; if (r == -EOPNOTSUPP) { @@ -567,7 +569,7 @@ int MultipartObjectProcessor::complete(size_t accounted_size, op = librados::ObjectWriteOperation{}; op.assert_exists(); // detect races with abort op.omap_set(m); - r = rgw_rados_operate(rctx.dpp, meta_obj_ref.pool.ioctx(), meta_obj_ref.obj.oid, &op, rctx.y); + r = rgw_rados_operate(rctx.dpp, meta_obj_ref.ioctx, meta_obj_ref.obj.oid, &op, rctx.y); } if (r < 0) { return r == -ENOENT ? -ERR_NO_SUCH_UPLOAD : r; diff --git a/src/rgw/driver/rados/rgw_putobj_processor.h b/src/rgw/driver/rados/rgw_putobj_processor.h index 9a21c0c793a..ce163e36f77 100644 --- a/src/rgw/driver/rados/rgw_putobj_processor.h +++ b/src/rgw/driver/rados/rgw_putobj_processor.h @@ -18,7 +18,6 @@ #include <optional> #include "rgw_putobj.h" -#include "services/svc_rados.h" #include "services/svc_tier_rados.h" #include "rgw_sal.h" #include "rgw_obj_manifest.h" @@ -70,7 +69,7 @@ class RadosWriter : public rgw::sal::DataProcessor { const RGWBucketInfo& bucket_info; RGWObjectCtx& obj_ctx; const rgw_obj head_obj; - RGWSI_RADOS::Obj stripe_obj; // current stripe object + rgw_rados_ref stripe_obj; // current stripe object RawObjSet written; // set of written objects for deletion const DoutPrefixProvider *dpp; optional_yield y; diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc index 2b5f7c4e7a6..9ef92270155 100644 --- a/src/rgw/driver/rados/rgw_rados.cc +++ b/src/rgw/driver/rados/rgw_rados.cc @@ -1020,8 +1020,8 @@ void RGWRados::finalize() { /* Before joining any sync threads, drain outstanding requests & * mark the async_processor as going_down() */ - if (svc.rados) { - svc.rados->stop_processor(); + if (svc.async_processor) { + svc.async_processor->stop(); } if (run_sync_thread) { @@ -1172,7 +1172,7 @@ int RGWRados::update_service_map(const DoutPrefixProvider *dpp, std::map<std::st return 0; } -/** +/** * Initialize the RADOS instance and prepare to do other ops * Returns 0 on success, -ERR# on failure. */ @@ -1180,7 +1180,7 @@ int RGWRados::init_complete(const DoutPrefixProvider *dpp, optional_yield y) { int ret; - /* + /* * create sync module instance even if we don't run sync thread, might need it for radosgw-admin */ sync_module = svc.sync_modules->get_sync_module(); @@ -1258,7 +1258,7 @@ int RGWRados::init_complete(const DoutPrefixProvider *dpp, optional_yield y) << pt.second.name << " present in zonegroup" << dendl; } } - auto async_processor = svc.rados->get_async_processor(); + auto async_processor = svc.async_processor; std::lock_guard l{meta_sync_thread_lock}; meta_sync_processor_thread = new RGWMetaSyncProcessorThread(this->driver, async_processor); ret = meta_sync_processor_thread->init(dpp); @@ -1283,7 +1283,7 @@ int RGWRados::init_complete(const DoutPrefixProvider *dpp, optional_yield y) std::lock_guard dl{data_sync_thread_lock}; for (auto source_zone : svc.zone->get_data_sync_source_zones()) { ldpp_dout(dpp, 5) << "starting data sync thread for zone " << source_zone->name << dendl; - auto *thread = new RGWDataSyncProcessorThread(this->driver, svc.rados->get_async_processor(), source_zone); + auto *thread = new RGWDataSyncProcessorThread(this->driver, svc.async_processor, source_zone); ret = thread->init(dpp); if (ret < 0) { ldpp_dout(dpp, 0) << "ERROR: failed to initialize data sync thread" << dendl; @@ -1363,10 +1363,11 @@ int RGWRados::init_complete(const DoutPrefixProvider *dpp, optional_yield y) int RGWRados::init_svc(bool raw, const DoutPrefixProvider *dpp) { if (raw) { - return svc.init_raw(cct, use_cache, null_yield, dpp); + return svc.init_raw(cct, use_cache, get_rados_handle(), null_yield, dpp); } - return svc.init(cct, use_cache, run_sync_thread, null_yield, dpp); + return svc.init(cct, use_cache, run_sync_thread, get_rados_handle(), + null_yield, dpp); } int RGWRados::init_ctl(const DoutPrefixProvider *dpp) @@ -1380,7 +1381,13 @@ int RGWRados::init_ctl(const DoutPrefixProvider *dpp) */ int RGWRados::init_begin(const DoutPrefixProvider *dpp) { - int ret = init_svc(false, dpp); + int ret = init_rados(); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to init rados (ret=" << cpp_strerror(-ret) << ")" << dendl; + return ret; + } + + ret = init_svc(false, dpp); if (ret < 0) { ldpp_dout(dpp, 0) << "ERROR: failed to init services (ret=" << cpp_strerror(-ret) << ")" << dendl; return ret; @@ -1394,7 +1401,7 @@ int RGWRados::init_begin(const DoutPrefixProvider *dpp) host_id = svc.zone_utils->gen_host_id(); - return init_rados(); + return 0; } /** @@ -1724,7 +1731,7 @@ int RGWRados::decode_policy(const DoutPrefixProvider *dpp, ACLOwner *owner) { auto i = bl.cbegin(); - RGWAccessControlPolicy policy(cct); + RGWAccessControlPolicy policy; try { policy.decode_owner(i); } catch (buffer::error& err) { @@ -1922,7 +1929,7 @@ int RGWRados::Bucket::List::list_objects_ordered( } if (params.access_list_filter && - ! params.access_list_filter->filter(obj.name, index_key.name)) { + !params.access_list_filter(obj.name, index_key.name)) { ldpp_dout(dpp, 20) << __func__ << ": skipping past namespaced objects, including \"" << entry.key << "\"" << dendl; @@ -2216,7 +2223,7 @@ int RGWRados::Bucket::List::list_objects_unordered(const DoutPrefixProvider *dpp } if (params.access_list_filter && - !params.access_list_filter->filter(obj.name, index_key.name)) { + !params.access_list_filter(obj.name, index_key.name)) { ldpp_dout(dpp, 20) << __func__ << ": skipping \"" << index_key << "\" because doesn't match filter" << dendl; @@ -2387,7 +2394,9 @@ bool RGWRados::obj_to_raw(const rgw_placement_rule& placement_rule, const rgw_ob std::string RGWRados::get_cluster_fsid(const DoutPrefixProvider *dpp, optional_yield y) { - return svc.rados->cluster_fsid(); + std::string s; + rados.cluster_fsid(&s); + return s; } int RGWRados::get_obj_head_ioctx(const DoutPrefixProvider *dpp, @@ -2422,25 +2431,19 @@ int RGWRados::get_obj_head_ref(const DoutPrefixProvider *dpp, const rgw_obj& obj, rgw_rados_ref *ref) { - get_obj_bucket_and_oid_loc(obj, ref->obj.oid, ref->obj.loc); - - rgw_pool pool; - if (!get_obj_data_pool(target_placement_rule, obj, &pool)) { - ldpp_dout(dpp, 0) << "ERROR: cannot get data pool for obj=" << obj << ", probably misconfiguration" << dendl; + rgw_raw_obj raw; + get_obj_bucket_and_oid_loc(obj, raw.oid, raw.loc); + if (!get_obj_data_pool(target_placement_rule, obj, &raw.pool)) { + ldpp_dout(dpp, 0) << "ERROR: cannot get data pool for obj=" << obj + << ", probably misconfiguration" << dendl; return -EIO; } - - ref->pool = svc.rados->pool(pool); - - int r = ref->pool.open(dpp, RGWSI_RADOS::OpenParams() - .set_mostly_omap(false)); + auto r = rgw_get_rados_ref(dpp, get_rados_handle(), raw, ref); if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed opening data pool (pool=" << pool << "); r=" << r << dendl; + ldpp_dout(dpp, 0) << "ERROR: failed opening data pool (pool=" + << raw.pool << "); r=" << r << dendl; return r; } - - ref->pool.ioctx().locator_set_key(ref->obj.loc); - return 0; } @@ -2452,24 +2455,18 @@ int RGWRados::get_obj_head_ref(const DoutPrefixProvider *dpp, return get_obj_head_ref(dpp, bucket_info.placement_rule, obj, ref); } -int RGWRados::get_raw_obj_ref(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, rgw_rados_ref *ref) +int RGWRados::get_raw_obj_ref(const DoutPrefixProvider *dpp, rgw_raw_obj obj, rgw_rados_ref* ref) { - ref->obj = obj; - - if (ref->obj.oid.empty()) { - ref->obj.oid = obj.pool.to_str(); - ref->obj.pool = svc.zone->get_zone_params().domain_root; + if (obj.oid.empty()) { + obj.oid = obj.pool.to_str(); + obj.pool = svc.zone->get_zone_params().domain_root; } - ref->pool = svc.rados->pool(obj.pool); - int r = ref->pool.open(dpp, RGWSI_RADOS::OpenParams() - .set_mostly_omap(false)); + int r = rgw_get_rados_ref(dpp, get_rados_handle(), std::move(obj), ref); if (r < 0) { ldpp_dout(dpp, 0) << "ERROR: failed opening pool (pool=" << obj.pool << "); r=" << r << dendl; return r; } - ref->pool.ioctx().locator_set_key(ref->obj.loc); - return 0; } @@ -2678,10 +2675,10 @@ int RGWRados::fix_tail_obj_locator(const DoutPrefixProvider *dpp, continue; } - auto& ioctx = ref.pool.ioctx(); + auto& ioctx = ref.ioctx; get_obj_bucket_and_oid_loc(loc, oid, locator); - ref.pool.ioctx().locator_set_key(locator); + ref.ioctx.locator_set_key(locator); ldpp_dout(dpp, 20) << __func__ << ": key=" << key << " oid=" << oid << " locator=" << locator << dendl; @@ -2742,7 +2739,7 @@ int RGWRados::BucketShard::init(const rgw_bucket& _bucket, ldpp_dout(dpp, 0) << "ERROR: open_bucket_index_shard() returned ret=" << ret << dendl; return ret; } - ldpp_dout(dpp, 20) << " bucket index object: " << bucket_obj.get_raw_obj() << dendl; + ldpp_dout(dpp, 20) << " bucket index object: " << bucket_obj << dendl; return 0; } @@ -3215,10 +3212,10 @@ int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_si return r; } - auto& ioctx = ref.pool.ioctx(); + auto& ioctx = ref.ioctx; tracepoint(rgw_rados, operate_enter, req_id.c_str()); - r = rgw_rados_operate(rctx.dpp, ref.pool.ioctx(), ref.obj.oid, &op, rctx.y); + r = rgw_rados_operate(rctx.dpp, ref.ioctx, ref.obj.oid, &op, rctx.y); tracepoint(rgw_rados, operate_exit, req_id.c_str()); if (r < 0) { /* we can expect to get -ECANCELED if object was replaced under, or -ENOENT if was removed, or -EEXIST if it did not exist @@ -4330,8 +4327,8 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& obj_ctx, } ACLOwner new_owner; - new_owner.set_id(*override_owner); - new_owner.set_name(owner_info.display_name); + new_owner.id = *override_owner; + new_owner.display_name = owner_info.display_name; acl.set_owner(new_owner); @@ -4783,8 +4780,10 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx, ref_tag = tag + '\0'; cls_refcount_get(op, ref_tag, true); - auto obj = svc.rados->obj(miter.get_location().get_raw_obj(this)); - ret = obj.open(dpp); + rgw_rados_ref obj; + ret = rgw_get_rados_ref(dpp, driver->getRados()->get_rados_handle(), + miter.get_location().get_raw_obj(this), + &obj); if (ret < 0) { ldpp_dout(dpp, 0) << "failed to open rados context for " << obj << dendl; goto done_ret; @@ -4792,8 +4791,9 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx, static constexpr uint64_t cost = 1; // 1 throttle unit per request static constexpr uint64_t id = 0; // ids unused - auto& ref = obj.get_ref(); - rgw::AioResultList completed = aio->get(ref.obj, rgw::Aio::librados_op(ref.pool.ioctx(), std::move(op), y), cost, id); + rgw::AioResultList completed = + aio->get(obj.obj, rgw::Aio::librados_op(obj.ioctx, std::move(op), y), + cost, id); ret = rgw::check_for_errors(completed); all_results.splice(all_results.end(), completed); if (ret < 0) { @@ -4860,19 +4860,20 @@ done_ret: if (r.result < 0) { continue; // skip errors } - auto obj = svc.rados->obj(r.obj); - ret2 = obj.open(dpp); + rgw_rados_ref obj; + ret2 = rgw_get_rados_ref(dpp, get_rados_handle(), r.obj, &obj); if (ret2 < 0) { continue; } - auto& ref = obj.get_ref(); ObjectWriteOperation op; cls_refcount_put(op, ref_tag, true); static constexpr uint64_t cost = 1; // 1 throttle unit per request static constexpr uint64_t id = 0; // ids unused - rgw::AioResultList completed = aio->get(ref.obj, rgw::Aio::librados_op(ref.pool.ioctx(), std::move(op), y), cost, id); + rgw::AioResultList completed = + aio->get(obj.obj, rgw::Aio::librados_op(obj.ioctx, std::move(op), y), + cost, id); ret2 = rgw::check_for_errors(completed); if (ret2 < 0) { ldpp_dout(dpp, 0) << "ERROR: cleanup after error failed to drop reference on obj=" << r.obj << dendl; @@ -5072,7 +5073,7 @@ int RGWRados::check_bucket_empty(const DoutPrefixProvider *dpp, RGWBucketInfo& b int RGWRados::delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, optional_yield y, const DoutPrefixProvider *dpp, bool check_empty) { const rgw_bucket& bucket = bucket_info.bucket; - RGWSI_RADOS::Pool index_pool; + librados::IoCtx index_pool; map<int, string> bucket_objs; int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, std::nullopt, bucket_info.layout.current_index, &index_pool, &bucket_objs, nullptr); if (r < 0) @@ -5127,7 +5128,7 @@ int RGWRados::delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& ob } /* remove bucket index objects asynchronously by best effort */ - (void) CLSRGWIssueBucketIndexClean(index_pool.ioctx(), + (void) CLSRGWIssueBucketIndexClean(index_pool, bucket_objs, cct->_conf->rgw_bucket_index_max_aio)(); } @@ -5151,7 +5152,7 @@ int RGWRados::set_bucket_owner(rgw_bucket& bucket, ACLOwner& owner, const DoutPr return r; } - info.owner = owner.get_id(); + info.owner = owner.id; r = put_bucket_instance_info(info, false, real_time(), &attrs, dpp, y); if (r < 0) { @@ -5315,7 +5316,7 @@ int RGWRados::bucket_check_index(const DoutPrefixProvider *dpp, RGWBucketInfo& b map<RGWObjCategory, RGWStorageStats> *existing_stats, map<RGWObjCategory, RGWStorageStats> *calculated_stats) { - RGWSI_RADOS::Pool index_pool; + librados::IoCtx index_pool; // key - bucket index object id // value - bucket index check OP returned result with the given bucket index object (shard) @@ -5332,7 +5333,7 @@ int RGWRados::bucket_check_index(const DoutPrefixProvider *dpp, RGWBucketInfo& b bucket_objs_ret.emplace(iter.first, rgw_cls_check_index_ret()); } - ret = CLSRGWIssueBucketCheck(index_pool.ioctx(), oids, bucket_objs_ret, cct->_conf->rgw_bucket_index_max_aio)(); + ret = CLSRGWIssueBucketCheck(index_pool, oids, bucket_objs_ret, cct->_conf->rgw_bucket_index_max_aio)(); if (ret < 0) { return ret; } @@ -5348,7 +5349,7 @@ int RGWRados::bucket_check_index(const DoutPrefixProvider *dpp, RGWBucketInfo& b int RGWRados::bucket_rebuild_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info) { - RGWSI_RADOS::Pool index_pool; + librados::IoCtx index_pool; map<int, string> bucket_objs; int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, std::nullopt, bucket_info.layout.current_index, &index_pool, &bucket_objs, nullptr); @@ -5356,7 +5357,7 @@ int RGWRados::bucket_rebuild_index(const DoutPrefixProvider *dpp, RGWBucketInfo& return r; } - return CLSRGWIssueBucketRebuild(index_pool.ioctx(), bucket_objs, cct->_conf->rgw_bucket_index_max_aio)(); + return CLSRGWIssueBucketRebuild(index_pool, bucket_objs, cct->_conf->rgw_bucket_index_max_aio)(); } static int resync_encrypted_multipart(const DoutPrefixProvider* dpp, @@ -5495,7 +5496,7 @@ int RGWRados::bucket_resync_encrypted_multipart(const DoutPrefixProvider* dpp, int RGWRados::bucket_set_reshard(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const cls_rgw_bucket_instance_entry& entry) { - RGWSI_RADOS::Pool index_pool; + librados::IoCtx index_pool; map<int, string> bucket_objs; int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, std::nullopt, bucket_info.layout.current_index, &index_pool, &bucket_objs, nullptr); @@ -5506,7 +5507,7 @@ int RGWRados::bucket_set_reshard(const DoutPrefixProvider *dpp, const RGWBucketI return r; } - r = CLSRGWIssueSetBucketResharding(index_pool.ioctx(), bucket_objs, entry, cct->_conf->rgw_bucket_index_max_aio)(); + r = CLSRGWIssueSetBucketResharding(index_pool, bucket_objs, entry, cct->_conf->rgw_bucket_index_max_aio)(); if (r < 0) { ldpp_dout(dpp, 0) << "ERROR: " << __func__ << ": unable to issue set bucket resharding, r=" << r << " (" << @@ -5619,8 +5620,8 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi struct rgw_bucket_dir_entry_meta meta; - meta.owner = params.obj_owner.get_id().to_str(); - meta.owner_display_name = params.obj_owner.get_display_name(); + meta.owner = params.obj_owner.id.to_str(); + meta.owner_display_name = params.obj_owner.display_name; if (real_clock::is_zero(params.mtime)) { meta.mtime = real_clock::now(); @@ -5740,7 +5741,7 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi store->remove_rgw_head_obj(op); - auto& ioctx = ref.pool.ioctx(); + auto& ioctx = ref.ioctx; r = rgw_rados_operate(dpp, ioctx, ref.obj.oid, &op, y); /* raced with another operation, object state is indeterminate */ @@ -5812,7 +5813,7 @@ int RGWRados::delete_raw_obj(const DoutPrefixProvider *dpp, const rgw_raw_obj& o ObjectWriteOperation op; op.remove(); - r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y); + r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y); if (r < 0) return r; @@ -6469,7 +6470,7 @@ int RGWRados::set_attrs(const DoutPrefixProvider *dpp, RGWObjectCtx* octx, RGWBu } struct timespec mtime_ts = real_clock::to_timespec(mtime); op.mtime2(&mtime_ts); - auto& ioctx = ref.pool.ioctx(); + auto& ioctx = ref.ioctx; r = rgw_rados_operate(dpp, ioctx, ref.obj.oid, &op, y); if (state) { if (r >= 0) { @@ -6891,8 +6892,8 @@ int RGWRados::Bucket::UpdateIndex::complete(const DoutPrefixProvider *dpp, int64 ldpp_dout(dpp, 0) << "WARNING: could not decode policy ret=" << ret << dendl; } } - ent.meta.owner = owner.get_id().to_str(); - ent.meta.owner_display_name = owner.get_display_name(); + ent.meta.owner = owner.id.to_str(); + ent.meta.owner_display_name = owner.display_name; ent.meta.content_type = content_type; ent.meta.appendable = appendable; @@ -7161,8 +7162,9 @@ int RGWRados::get_obj_iterate_cb(const DoutPrefixProvider *dpp, } } - auto obj = d->rgwrados->svc.rados->obj(read_obj); - int r = obj.open(dpp); + rgw_rados_ref obj; + int r = rgw_get_rados_ref(dpp, d->rgwrados->get_rados_handle(), read_obj, + &obj); if (r < 0) { ldpp_dout(dpp, 4) << "failed to open rados context for " << read_obj << dendl; return r; @@ -7174,8 +7176,7 @@ int RGWRados::get_obj_iterate_cb(const DoutPrefixProvider *dpp, const uint64_t cost = len; const uint64_t id = obj_ofs; // use logical object offset for sorting replies - auto& ref = obj.get_ref(); - auto completed = d->aio->get(ref.obj, rgw::Aio::librados_op(ref.pool.ioctx(), std::move(op), d->yield), cost, id); + auto completed = d->aio->get(obj.obj, rgw::Aio::librados_op(obj.ioctx, std::move(op), d->yield), cost, id); return d->flush(std::move(completed)); } @@ -7282,7 +7283,7 @@ int RGWRados::obj_operate(const DoutPrefixProvider *dpp, const RGWBucketInfo& bu return r; } - return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, op, y); + return rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, op, y); } int RGWRados::obj_operate(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, ObjectReadOperation *op, optional_yield y) @@ -7295,7 +7296,7 @@ int RGWRados::obj_operate(const DoutPrefixProvider *dpp, const RGWBucketInfo& bu bufferlist outbl; - return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, op, &outbl, y); + return rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, op, &outbl, y); } void RGWRados::olh_cancel_modification(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, @@ -7320,7 +7321,7 @@ void RGWRados::olh_cancel_modification(const DoutPrefixProvider *dpp, const RGWB ObjectWriteOperation op; bucket_index_guard_olh_op(dpp, state, op); op.rmxattr(attr_name.c_str()); - r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y); + r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y); if (r < 0) { if (r != -ENOENT && r != -ECANCELED) { ldpp_dout(dpp, 0) << __func__ << " target_obj=" << olh_obj << " rmxattr rgw_rados_operate() returned " << r << dendl; @@ -7336,7 +7337,7 @@ void RGWRados::olh_cancel_modification(const DoutPrefixProvider *dpp, const RGWB rm_op.cmpxattr(RGW_ATTR_OLH_INFO, CEPH_OSD_CMPXATTR_OP_EQ, bufferlist()); cls_obj_check_prefix_exist(rm_op, RGW_ATTR_OLH_PENDING_PREFIX, true); rm_op.remove(); - r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &rm_op, y); + r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &rm_op, y); } if (r < 0 && (r != -ENOENT && r != -ECANCELED)) { ldpp_dout(dpp, 0) << __func__ << " target_obj=" << olh_obj << " olh rm rgw_rados_operate() returned " << r << dendl; @@ -7556,8 +7557,8 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs, constexpr int num_retries = 10; for (int i = 1; i <= num_retries; i++) { // nb: 1-based for loop - auto& ref = bs->bucket_obj.get_ref(); - ret = cls_rgw_get_bucket_resharding(ref.pool.ioctx(), ref.obj.oid, &entry); + auto& ref = bs->bucket_obj; + ret = cls_rgw_get_bucket_resharding(ref.ioctx, ref.obj.oid, &entry); if (ret == -ENOENT) { ret = fetch_new_bucket_info("get_bucket_resharding_failed"); if (ret < 0) { @@ -7696,7 +7697,7 @@ int RGWRados::bucket_index_link_olh(const DoutPrefixProvider *dpp, RGWBucketInfo r = guard_reshard(dpp, &bs, obj_instance, bucket_info, [&](BucketShard *bs) -> int { cls_rgw_obj_key key(obj_instance.key.get_index_key_name(), obj_instance.key.instance); - auto& ref = bs->bucket_obj.get_ref(); + auto& ref = bs->bucket_obj; librados::ObjectWriteOperation op; op.assert_exists(); // bucket index shard must exist cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING); @@ -7704,7 +7705,7 @@ int RGWRados::bucket_index_link_olh(const DoutPrefixProvider *dpp, RGWBucketInfo delete_marker, op_tag, meta, olh_epoch, unmod_since, high_precision_time, svc.zone->need_to_log_data(), zones_trace); - return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y); + return rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y); }, y); if (r < 0) { ldpp_dout(dpp, 20) << "rgw_rados_operate() after cls_rgw_bucket_link_olh() returned r=" << r << dendl; @@ -7747,13 +7748,13 @@ int RGWRados::bucket_index_unlink_instance(const DoutPrefixProvider *dpp, cls_rgw_obj_key key(obj_instance.key.get_index_key_name(), obj_instance.key.instance); r = guard_reshard(dpp, &bs, obj_instance, bucket_info, [&](BucketShard *bs) -> int { - auto& ref = bs->bucket_obj.get_ref(); + auto& ref = bs->bucket_obj; librados::ObjectWriteOperation op; op.assert_exists(); // bucket index shard must exist cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING); cls_rgw_bucket_unlink_instance(op, key, op_tag, olh_tag, olh_epoch, svc.zone->need_to_log_data(), zones_trace); - return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y); + return rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y); }, y); if (r < 0) { ldpp_dout(dpp, 20) << "rgw_rados_operate() after cls_rgw_bucket_link_instance() returned r=" << r << dendl; @@ -7787,14 +7788,14 @@ int RGWRados::bucket_index_read_olh_log(const DoutPrefixProvider *dpp, cls_rgw_obj_key key(obj_instance.key.get_index_key_name(), string()); - auto& shard_ref = bs.bucket_obj.get_ref(); + auto& shard_ref = bs.bucket_obj; ObjectReadOperation op; rgw_cls_read_olh_log_ret log_ret; int op_ret = 0; - cls_rgw_get_olh_log(op, key, ver_marker, olh_tag, log_ret, op_ret); + cls_rgw_get_olh_log(op, key, ver_marker, olh_tag, log_ret, op_ret); bufferlist outbl; - r = rgw_rados_operate(dpp, shard_ref.pool.ioctx(), shard_ref.obj.oid, &op, &outbl, y); + r = rgw_rados_operate(dpp, shard_ref.ioctx, shard_ref.obj.oid, &op, &outbl, y); if (r < 0) { return r; } @@ -7855,7 +7856,7 @@ int RGWRados::repair_olh(const DoutPrefixProvider *dpp, RGWObjState* state, cons if (r < 0) { return r; } - r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y); + r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y); if (r < 0) { ldpp_dout(dpp, 0) << "repair_olh failed to write olh attributes with " << cpp_strerror(r) << dendl; @@ -7922,10 +7923,10 @@ int RGWRados::bucket_index_clear_olh(const DoutPrefixProvider *dpp, [&](BucketShard *pbs) -> int { ObjectWriteOperation op; op.assert_exists(); // bucket index shard must exist - auto& ref = pbs->bucket_obj.get_ref(); + auto& ref = bs.bucket_obj; cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING); cls_rgw_clear_olh(op, key, olh_tag); - return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y); + return rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y); }, y); if (ret < 0) { ldpp_dout(dpp, 5) << "rgw_rados_operate() after cls_rgw_clear_olh() returned ret=" << ret << dendl; @@ -8076,7 +8077,7 @@ int RGWRados::apply_olh_log(const DoutPrefixProvider *dpp, } /* update olh object */ - r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y); + r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y); if (r < 0) { ldpp_dout(dpp, 0) << "ERROR: " << __func__ << ": could not apply olh update to oid \"" << ref.obj.oid << "\", r=" << r << dendl; return r; @@ -8154,7 +8155,7 @@ int RGWRados::clear_olh(const DoutPrefixProvider *dpp, cls_obj_check_prefix_exist(rm_op, RGW_ATTR_OLH_PENDING_PREFIX, true); /* fail if found one of these, pending modification */ rm_op.remove(); - r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &rm_op, y); + r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &rm_op, y); if (r == -ECANCELED) { return r; /* someone else made a modification in the meantime */ } @@ -8442,7 +8443,7 @@ int RGWRados::remove_olh_pending_entries(const DoutPrefixProvider *dpp, const RG op.rmxattr(i->first.c_str()); } - r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y); + r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y); if (r == -ENOENT || r == -ECANCELED) { /* raced with some other change, shouldn't sweat about it */ return 0; @@ -8538,10 +8539,10 @@ int RGWRados::raw_obj_stat(const DoutPrefixProvider *dpp, op.read(0, cct->_conf->rgw_max_chunk_size, first_chunk, NULL); } bufferlist outbl; - r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, &outbl, y); + r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, &outbl, y); if (epoch) { - *epoch = ref.pool.ioctx().get_last_version(); + *epoch = ref.ioctx.get_last_version(); } if (r < 0) @@ -8603,34 +8604,36 @@ int RGWRados::get_bucket_stats(const DoutPrefixProvider *dpp, } class RGWGetBucketStatsContext : public RGWGetDirHeader_CB { - RGWGetBucketStats_CB *cb; + boost::intrusive_ptr<rgw::sal::ReadStatsCB> cb; uint32_t pendings; - map<RGWObjCategory, RGWStorageStats> stats; + RGWStorageStats stats; int ret_code; bool should_cb; ceph::mutex lock = ceph::make_mutex("RGWGetBucketStatsContext"); public: - RGWGetBucketStatsContext(RGWGetBucketStats_CB *_cb, uint32_t _pendings) - : cb(_cb), pendings(_pendings), stats(), ret_code(0), should_cb(true) + RGWGetBucketStatsContext(boost::intrusive_ptr<rgw::sal::ReadStatsCB> cb, uint32_t _pendings) + : cb(std::move(cb)), pendings(_pendings), stats(), ret_code(0), should_cb(true) {} - void handle_response(int r, rgw_bucket_dir_header& header) override { + void handle_response(int r, const rgw_bucket_dir_header& header) override { std::lock_guard l{lock}; if (should_cb) { - if ( r >= 0) { - accumulate_raw_stats(header, stats); + if (r >= 0) { + for (const auto& [c, s] : header.stats) { + stats.size += s.total_size; + stats.size_rounded += s.total_size_rounded; + stats.size_utilized += s.actual_size; + stats.num_objects += s.num_entries; + } } else { ret_code = r; } // Are we all done? if (--pendings == 0) { - if (!ret_code) { - cb->set_response(&stats); - } - cb->handle_response(ret_code); - cb->put(); + cb->handle_response(ret_code, stats); + cb.reset(); } } } @@ -8641,19 +8644,16 @@ public: } }; -int RGWRados::get_bucket_stats_async(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout, int shard_id, RGWGetBucketStats_CB *ctx) +int RGWRados::get_bucket_stats_async(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout, int shard_id, boost::intrusive_ptr<rgw::sal::ReadStatsCB> cb) { int num_aio = 0; - RGWGetBucketStatsContext *get_ctx = new RGWGetBucketStatsContext(ctx, bucket_info.layout.current_index.layout.normal.num_shards ? : 1); - ceph_assert(get_ctx); - int r = cls_bucket_head_async(dpp, bucket_info, idx_layout, shard_id, get_ctx, &num_aio); + boost::intrusive_ptr headercb = new RGWGetBucketStatsContext(std::move(cb), bucket_info.layout.current_index.layout.normal.num_shards ? : 1); + int r = cls_bucket_head_async(dpp, bucket_info, idx_layout, shard_id, headercb, &num_aio); if (r < 0) { - ctx->put(); if (num_aio) { - get_ctx->unset_cb(); + headercb->unset_cb(); } } - get_ctx->put(); return r; } @@ -8774,7 +8774,7 @@ int RGWRados::append_async(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, size librados::Rados *rad = get_rados_handle(); librados::AioCompletion *completion = rad->aio_create_completion(nullptr, nullptr); - r = ref.pool.ioctx().aio_append(ref.obj.oid, completion, bl, size); + r = ref.ioctx.aio_append(ref.obj.oid, completion, bl, size); completion->release(); return r; } @@ -8830,7 +8830,7 @@ string RGWRados::pool_iterate_get_cursor(RGWPoolIterCtx& ctx) static int do_pool_iterate(const DoutPrefixProvider *dpp, CephContext* cct, RGWPoolIterCtx& ctx, uint32_t num, vector<rgw_bucket_dir_entry>& objs, - bool *is_truncated, RGWAccessListFilter *filter) + bool *is_truncated, const rgw::AccessListFilter& filter) { librados::IoCtx& io_ctx = ctx.io_ctx; librados::NObjectIterator& iter = ctx.iter; @@ -8847,7 +8847,7 @@ static int do_pool_iterate(const DoutPrefixProvider *dpp, CephContext* cct, RGWP ldpp_dout(dpp, 20) << "RGWRados::pool_iterate: got " << oid << dendl; // fill it in with initial values; we may correct later - if (filter && !filter->filter(oid, oid)) + if (filter && !filter(oid, oid)) continue; e.key = oid; @@ -8861,7 +8861,7 @@ static int do_pool_iterate(const DoutPrefixProvider *dpp, CephContext* cct, RGWP } int RGWRados::pool_iterate(const DoutPrefixProvider *dpp, RGWPoolIterCtx& ctx, uint32_t num, vector<rgw_bucket_dir_entry>& objs, - bool *is_truncated, RGWAccessListFilter *filter) + bool *is_truncated, const rgw::AccessListFilter& filter) { // catch exceptions from NObjectIterator::operator++() try { @@ -8898,9 +8898,9 @@ int RGWRados::list_raw_objects_next(const DoutPrefixProvider *dpp, const string& if (!ctx.initialized) { return -EINVAL; } - RGWAccessListFilterPrefix filter(prefix_filter); + auto filter = rgw::AccessListFilterPrefix(prefix_filter); vector<rgw_bucket_dir_entry> objs; - int r = pool_iterate(dpp, ctx.iter_ctx, max, objs, is_truncated, &filter); + int r = pool_iterate(dpp, ctx.iter_ctx, max, objs, is_truncated, filter); if (r < 0) { if(r != -ENOENT) ldpp_dout(dpp, 10) << "failed to list objects pool_iterate returned r=" << r << dendl; @@ -8990,21 +8990,21 @@ int RGWRados::bi_get(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_ cls_rgw_obj_key key(obj.key.get_index_key_name(), obj.key.instance); - auto& ref = bs.bucket_obj.get_ref(); - - return cls_rgw_bi_get(ref.pool.ioctx(), ref.obj.oid, index_type, key, entry); + auto& ref = bs.bucket_obj; + + return cls_rgw_bi_get(ref.ioctx, ref.obj.oid, index_type, key, entry); } void RGWRados::bi_put(ObjectWriteOperation& op, BucketShard& bs, rgw_cls_bi_entry& entry, optional_yield y) { - auto& ref = bs.bucket_obj.get_ref(); + auto& ref = bs.bucket_obj; cls_rgw_bi_put(op, ref.obj.oid, entry); } int RGWRados::bi_put(BucketShard& bs, rgw_cls_bi_entry& entry, optional_yield y) { - auto& ref = bs.bucket_obj.get_ref(); - int ret = cls_rgw_bi_put(ref.pool.ioctx(), ref.obj.oid, entry); + auto& ref = bs.bucket_obj; + int ret = cls_rgw_bi_put(ref.ioctx, ref.obj.oid, entry); if (ret < 0) return ret; @@ -9042,8 +9042,8 @@ int RGWRados::bi_list(const DoutPrefixProvider *dpp, rgw_bucket& bucket, return ret; } - auto& ref = bs.bucket_obj.get_ref(); - ret = cls_rgw_bi_list(ref.pool.ioctx(), ref.obj.oid, obj_name_filter, marker, max, entries, is_truncated); + auto& ref = bs.bucket_obj; + ret = cls_rgw_bi_list(ref.ioctx, ref.obj.oid, obj_name_filter, marker, max, entries, is_truncated); if (ret == -ENOENT) { *is_truncated = false; } @@ -9056,8 +9056,8 @@ int RGWRados::bi_list(const DoutPrefixProvider *dpp, rgw_bucket& bucket, int RGWRados::bi_list(BucketShard& bs, const string& obj_name_filter, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated, optional_yield y) { - auto& ref = bs.bucket_obj.get_ref(); - int ret = cls_rgw_bi_list(ref.pool.ioctx(), ref.obj.oid, obj_name_filter, marker, max, entries, is_truncated); + auto& ref = bs.bucket_obj; + int ret = cls_rgw_bi_list(ref.ioctx, ref.obj.oid, obj_name_filter, marker, max, entries, is_truncated); if (ret < 0) return ret; @@ -9082,8 +9082,8 @@ int RGWRados::bi_list(const DoutPrefixProvider *dpp, int RGWRados::bi_remove(const DoutPrefixProvider *dpp, BucketShard& bs) { - auto& ref = bs.bucket_obj.get_ref(); - int ret = ref.pool.ioctx().remove(ref.obj.oid); + auto& ref = bs.bucket_obj; + int ret = ref.ioctx.remove(ref.obj.oid); if (ret == -ENOENT) { ret = 0; } @@ -9247,13 +9247,13 @@ int RGWRados::cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj int RGWRados::cls_obj_set_bucket_tag_timeout(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, uint64_t timeout) { - RGWSI_RADOS::Pool index_pool; + librados::IoCtx index_pool; map<int, string> bucket_objs; int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, std::nullopt, bucket_info.layout.current_index, &index_pool, &bucket_objs, nullptr); if (r < 0) return r; - return CLSRGWIssueSetTagTimeout(index_pool.ioctx(), bucket_objs, cct->_conf->rgw_bucket_index_max_aio, timeout)(); + return CLSRGWIssueSetTagTimeout(index_pool, bucket_objs, cct->_conf->rgw_bucket_index_max_aio, timeout)(); } @@ -9333,7 +9333,7 @@ int RGWRados::cls_bucket_list_ordered(const DoutPrefixProvider *dpp, m.clear(); - RGWSI_RADOS::Pool index_pool; + librados::IoCtx index_pool; // key - oid (for different shards if there is any) // value - list result for the corresponding oid (shard), it is filled by // the AIO callback @@ -9381,7 +9381,7 @@ int RGWRados::cls_bucket_list_ordered(const DoutPrefixProvider *dpp, " shard(s) for " << num_entries_per_shard << " entries to get " << num_entries << " total entries" << dendl; - auto& ioctx = index_pool.ioctx(); + auto& ioctx = index_pool; std::map<int, rgw_cls_list_ret> shard_list_results; cls_rgw_obj_key start_after_key(start_after.name, start_after.instance); r = CLSRGWIssueBucketList(ioctx, start_after_key, prefix, delimiter, @@ -9666,10 +9666,9 @@ int RGWRados::cls_bucket_list_unordered(const DoutPrefixProvider *dpp, ldout_bitx(bitx, dpp, 25) << "BACKTRACE: " << __func__ << ": " << ClibBackTrace(0) << dendl_bitx; ent_list.clear(); - static MultipartMetaFilter multipart_meta_filter; *is_truncated = false; - RGWSI_RADOS::Pool index_pool; + librados::IoCtx index_pool; std::map<int, std::string> oids; int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, shard_id, idx_layout, &index_pool, &oids, nullptr); @@ -9677,7 +9676,7 @@ int RGWRados::cls_bucket_list_unordered(const DoutPrefixProvider *dpp, return r; } - auto& ioctx = index_pool.ioctx(); + auto& ioctx = index_pool; const uint32_t num_shards = oids.size(); @@ -9848,7 +9847,7 @@ int RGWRados::cls_obj_usage_log_add(const DoutPrefixProvider *dpp, const string& ObjectWriteOperation op; cls_rgw_usage_log_add(op, info); - r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y); + r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y); return r; } @@ -9867,7 +9866,7 @@ int RGWRados::cls_obj_usage_log_read(const DoutPrefixProvider *dpp, const string *is_truncated = false; - r = cls_rgw_usage_log_read(ref.pool.ioctx(), ref.obj.oid, user, bucket, start_epoch, end_epoch, + r = cls_rgw_usage_log_read(ref.ioctx, ref.obj.oid, user, bucket, start_epoch, end_epoch, max_entries, read_iter, usage, is_truncated); return r; @@ -9879,7 +9878,7 @@ static int cls_rgw_usage_log_trim_repeat(const DoutPrefixProvider *dpp, rgw_rado do { librados::ObjectWriteOperation op; cls_rgw_usage_log_trim(op, user, bucket, start_epoch, end_epoch); - int r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y); + int r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y); if (r == -ENODATA) done = true; else if (r < 0) @@ -9915,7 +9914,7 @@ int RGWRados::cls_obj_usage_log_clear(const DoutPrefixProvider *dpp, string& oid } librados::ObjectWriteOperation op; cls_rgw_usage_log_clear(op); - r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y); + r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y); return r; } @@ -9938,7 +9937,7 @@ int RGWRados::remove_objs_from_index(const DoutPrefixProvider *dpp, } const uint32_t num_shards = current_index.layout.normal.num_shards; - RGWSI_RADOS::Pool index_pool; + librados::IoCtx index_pool; std::map<int, std::string> index_oids; int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, std::nullopt, bucket_info.layout.current_index, @@ -9981,7 +9980,7 @@ int RGWRados::remove_objs_from_index(const DoutPrefixProvider *dpp, ", shard=" << shard << ", oid=" << oid << ", num_keys=" << removals.second.size() << dendl_bitx; - r = index_pool.ioctx().omap_rm_keys(oid, removals.second); + r = index_pool.omap_rm_keys(oid, removals.second); if (r < 0) { ldout_bitx(bitx, dpp, 0) << "ERROR: " << __func__ << ": omap_rm_keys returned ret=" << r << @@ -10014,9 +10013,8 @@ int RGWRados::check_disk_state(const DoutPrefixProvider *dpp, rgw_obj obj(bucket_info.bucket, list_state.key); - MultipartMetaFilter multipart_meta_filter; string temp_key; - if (multipart_meta_filter.filter(list_state.key.name, temp_key)) { + if (MultipartMetaFilter(list_state.key.name, temp_key)) { obj.in_extra_data = true; } @@ -10111,8 +10109,8 @@ int RGWRados::check_disk_state(const DoutPrefixProvider *dpp, object.meta.etag = etag; object.meta.content_type = content_type; object.meta.storage_class = storage_class; - object.meta.owner = owner.get_id().to_str(); - object.meta.owner_display_name = owner.get_display_name(); + object.meta.owner = owner.id.to_str(); + object.meta.owner_display_name = owner.display_name; object.meta.appendable = appendable; // encode suggested updates @@ -10141,8 +10139,8 @@ int RGWRados::check_disk_state(const DoutPrefixProvider *dpp, list_state.tag = astate->obj_tag.c_str(); } - list_state.meta.owner = owner.get_id().to_str(); - list_state.meta.owner_display_name = owner.get_display_name(); + list_state.meta.owner = owner.id.to_str(); + list_state.meta.owner_display_name = owner.display_name; list_state.exists = true; @@ -10156,7 +10154,7 @@ int RGWRados::check_disk_state(const DoutPrefixProvider *dpp, int RGWRados::cls_bucket_head(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout, int shard_id, vector<rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids) { - RGWSI_RADOS::Pool index_pool; + librados::IoCtx index_pool; map<int, string> oids; map<int, struct rgw_cls_list_ret> list_results; int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, shard_id, idx_layout, &index_pool, &oids, bucket_instance_ids); @@ -10166,7 +10164,7 @@ int RGWRados::cls_bucket_head(const DoutPrefixProvider *dpp, const RGWBucketInfo return r; } - r = CLSRGWIssueGetDirHeader(index_pool.ioctx(), oids, list_results, cct->_conf->rgw_bucket_index_max_aio)(); + r = CLSRGWIssueGetDirHeader(index_pool, oids, list_results, cct->_conf->rgw_bucket_index_max_aio)(); if (r < 0) { ldpp_dout(dpp, 20) << "cls_bucket_head: CLSRGWIssueGetDirHeader() returned " << r << dendl; @@ -10180,25 +10178,24 @@ int RGWRados::cls_bucket_head(const DoutPrefixProvider *dpp, const RGWBucketInfo return 0; } -int RGWRados::cls_bucket_head_async(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio) +int RGWRados::cls_bucket_head_async(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, + const rgw::bucket_index_layout_generation& idx_layout, int shard_id, + boost::intrusive_ptr<RGWGetDirHeader_CB> cb, int *num_aio) { - RGWSI_RADOS::Pool index_pool; + librados::IoCtx index_pool; map<int, string> bucket_objs; int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, shard_id, idx_layout, &index_pool, &bucket_objs, nullptr); if (r < 0) return r; - map<int, string>::iterator iter = bucket_objs.begin(); - for (; iter != bucket_objs.end(); ++iter) { - r = cls_rgw_get_dir_header_async(index_pool.ioctx(), iter->second, static_cast<RGWGetDirHeader_CB*>(ctx->get())); + for (auto& pair : bucket_objs) { + r = cls_rgw_get_dir_header_async(index_pool, pair.second, cb); if (r < 0) { - ctx->put(); - break; - } else { - (*num_aio)++; + return r; } + (*num_aio)++; } - return r; + return 0; } int RGWRados::check_bucket_shards(const RGWBucketInfo& bucket_info, @@ -10340,7 +10337,7 @@ int RGWRados::delete_raw_obj_aio(const DoutPrefixProvider *dpp, const rgw_raw_ob cls_rgw_remove_obj(op, prefixes); AioCompletion *c = librados::Rados::aio_create_completion(nullptr, nullptr); - ret = ref.pool.ioctx().aio_operate(ref.obj.oid, c, &op); + ret = ref.ioctx.aio_operate(ref.obj.oid, c, &op); if (ret < 0) { ldpp_dout(dpp, -1) << "ERROR: AioOperate failed with ret=" << ret << dendl; c->release(); @@ -10380,7 +10377,7 @@ int RGWRados::delete_obj_aio(const DoutPrefixProvider *dpp, const rgw_obj& obj, cls_rgw_remove_obj(op, prefixes); AioCompletion *c = librados::Rados::aio_create_completion(nullptr, nullptr); - ret = ref.pool.ioctx().aio_operate(ref.obj.oid, c, &op); + ret = ref.ioctx.aio_operate(ref.obj.oid, c, &op); if (ret < 0) { ldpp_dout(dpp, -1) << "ERROR: AioOperate failed with ret=" << ret << dendl; c->release(); diff --git a/src/rgw/driver/rados/rgw_rados.h b/src/rgw/driver/rados/rgw_rados.h index f61730cb4de..986dc091163 100644 --- a/src/rgw/driver/rados/rgw_rados.h +++ b/src/rgw/driver/rados/rgw_rados.h @@ -33,13 +33,13 @@ #include "rgw_aio.h" #include "rgw_d3n_cacherequest.h" -#include "services/svc_rados.h" #include "services/svc_bi_rados.h" #include "common/Throttle.h" #include "common/ceph_mutex.h" #include "rgw_cache.h" #include "rgw_sal_fwd.h" #include "rgw_pubsub.h" +#include "rgw_tools.h" struct D3nDataCache; @@ -576,7 +576,7 @@ public: } - int get_raw_obj_ref(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, rgw_rados_ref *ref); + int get_raw_obj_ref(const DoutPrefixProvider *dpp, rgw_raw_obj obj, rgw_rados_ref *ref); int list_raw_objects_init(const DoutPrefixProvider *dpp, const rgw_pool& pool, const std::string& marker, RGWListRawObjsCtx *ctx); int list_raw_objects_next(const DoutPrefixProvider *dpp, const std::string& prefix_filter, int max, @@ -651,7 +651,7 @@ public: RGWRados *store; rgw_bucket bucket; int shard_id; - RGWSI_RADOS::Obj bucket_obj; + rgw_rados_ref bucket_obj; explicit BucketShard(RGWRados *_store) : store(_store), shard_id(-1) {} int init(const rgw_bucket& _bucket, const rgw_obj& obj, @@ -664,7 +664,7 @@ public: friend std::ostream& operator<<(std::ostream& out, const BucketShard& bs) { out << "BucketShard:{ bucket=" << bs.bucket << ", shard_id=" << bs.shard_id << - ", bucket_ojb=" << bs.bucket_obj << "}"; + ", bucket_obj=" << bs.bucket_obj << "}"; return out; } }; @@ -1023,14 +1023,13 @@ public: rgw_obj_key end_marker; std::string ns; bool enforce_ns; - RGWAccessListFilter* access_list_filter; + rgw::AccessListFilter access_list_filter; RGWBucketListNameFilter force_check_filter; bool list_versions; bool allow_unordered; Params() : enforce_ns(true), - access_list_filter(nullptr), list_versions(false), allow_unordered(false) {} @@ -1401,7 +1400,7 @@ public: int decode_policy(const DoutPrefixProvider *dpp, bufferlist& bl, ACLOwner *owner); int get_bucket_stats(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout, int shard_id, std::string *bucket_ver, std::string *master_ver, std::map<RGWObjCategory, RGWStorageStats>& stats, std::string *max_marker, bool* syncstopped = NULL); - int get_bucket_stats_async(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout, int shard_id, RGWGetBucketStats_CB *cb); + int get_bucket_stats_async(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout, int shard_id, boost::intrusive_ptr<rgw::sal::ReadStatsCB> cb); int put_bucket_instance_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, const std::map<std::string, bufferlist> *pattrs, const DoutPrefixProvider *dpp, optional_yield y); /* xxx dang obj_ctx -> svc */ @@ -1483,7 +1482,7 @@ public: int cls_bucket_head_async(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout, - int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio); + int shard_id, boost::intrusive_ptr<RGWGetDirHeader_CB> cb, int *num_aio); int bi_get_instance(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_dir_entry *dirent, optional_yield y); int bi_get_olh(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_olh_entry *olh, optional_yield y); int bi_get(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, BIIndexType index_type, rgw_cls_bi_entry *entry, optional_yield y); @@ -1638,7 +1637,7 @@ public: */ int pool_iterate(const DoutPrefixProvider *dpp, RGWPoolIterCtx& ctx, uint32_t num, std::vector<rgw_bucket_dir_entry>& objs, - bool *is_truncated, RGWAccessListFilter *filter); + bool *is_truncated, const rgw::AccessListFilter& filter); uint64_t next_bucket_id(); diff --git a/src/rgw/driver/rados/rgw_sal_rados.cc b/src/rgw/driver/rados/rgw_sal_rados.cc index 478d7a5b706..5ede8d44fa9 100644 --- a/src/rgw/driver/rados/rgw_sal_rados.cc +++ b/src/rgw/driver/rados/rgw_sal_rados.cc @@ -187,7 +187,7 @@ int RadosUser::read_stats(const DoutPrefixProvider *dpp, return store->ctl()->user->read_stats(dpp, get_id(), stats, y, last_stats_sync, last_stats_update); } -int RadosUser::read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb) +int RadosUser::read_stats_async(const DoutPrefixProvider *dpp, boost::intrusive_ptr<ReadStatsCB> cb) { return store->svc()->user->read_stats_async(dpp, get_id(), cb); } @@ -531,7 +531,7 @@ int RadosBucket::read_stats(const DoutPrefixProvider *dpp, int RadosBucket::read_stats_async(const DoutPrefixProvider *dpp, const bucket_index_layout_generation& idx_layout, - int shard_id, RGWGetBucketStats_CB* ctx) + int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx) { return store->getRados()->get_bucket_stats_async(dpp, get_info(), idx_layout, shard_id, ctx); } @@ -684,7 +684,7 @@ int RadosBucket::set_acl(const DoutPrefixProvider* dpp, RGWAccessControlPolicy & map<string, bufferlist>& attrs = get_attrs(); attrs[RGW_ATTR_ACL] = aclbl; - info.owner = acl.get_owner().get_id(); + info.owner = acl.get_owner().id; int r = store->ctl()->bucket->store_bucket_instance_info(info.bucket, info, y, dpp, @@ -751,13 +751,12 @@ int RadosBucket::list_multiparts(const DoutPrefixProvider *dpp, { rgw::sal::Bucket::ListParams params; rgw::sal::Bucket::ListResults results; - MultipartMetaFilter mp_filter; params.prefix = prefix; params.delim = delim; params.marker = marker; params.ns = RGW_OBJ_NS_MULTIPART; - params.access_list_filter = &mp_filter; + params.access_list_filter = MultipartMetaFilter; int ret = list(dpp, params, max_uploads, results, y); @@ -767,8 +766,10 @@ int RadosBucket::list_multiparts(const DoutPrefixProvider *dpp, if (!results.objs.empty()) { for (const rgw_bucket_dir_entry& dentry : results.objs) { rgw_obj_key key(dentry.key); - ACLOwner owner(rgw_user(dentry.meta.owner)); - owner.set_name(dentry.meta.owner_display_name); + const ACLOwner owner{ + .id = rgw_user(dentry.meta.owner), + .display_name = dentry.meta.owner_display_name + }; uploads.push_back(this->get_multipart_upload(key.name, std::nullopt, std::move(owner), dentry.meta.mtime)); } @@ -1591,7 +1592,7 @@ int RadosObject::get_torrent_info(const DoutPrefixProvider* dpp, librados::ObjectReadOperation op; op.omap_get_vals_by_keys(keys, &result, nullptr); - ret = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, nullptr, y); + ret = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, nullptr, y); if (ret < 0) { return ret; } @@ -1648,7 +1649,7 @@ int RadosObject::chown(User& new_user, const DoutPrefixProvider* dpp, optional_y } bufferlist& bl = aiter->second; - RGWAccessControlPolicy policy(store->ctx()); + RGWAccessControlPolicy policy; ACLOwner owner; auto bliter = bl.cbegin(); try { @@ -1664,16 +1665,16 @@ int RadosObject::chown(User& new_user, const DoutPrefixProvider* dpp, optional_y RGWAccessControlList& acl = policy.get_acl(); //Remove grant that is set to old owner - acl.remove_canon_user_grant(owner.get_id()); + acl.remove_canon_user_grant(owner.id); //Create a grant and add grant ACLGrant grant; grant.set_canon(new_user.get_id(), new_user.get_display_name(), RGW_PERM_FULL_CONTROL); - acl.add_grant(&grant); + acl.add_grant(grant); //Update the ACL owner to the new user - owner.set_id(new_user.get_id()); - owner.set_name(new_user.get_display_name()); + owner.id = new_user.get_id(); + owner.display_name = new_user.get_display_name(); policy.set_owner(owner); bl.clear(); @@ -1987,7 +1988,7 @@ RadosObject::RadosDeleteOp::RadosDeleteOp(RadosObject *_source) : int RadosObject::RadosDeleteOp::delete_obj(const DoutPrefixProvider* dpp, optional_yield y) { - parent_op.params.bucket_owner = params.bucket_owner.get_id(); + parent_op.params.bucket_owner = params.bucket_owner.id; parent_op.params.versioning_status = params.versioning_status; parent_op.params.obj_owner = params.obj_owner; parent_op.params.olh_epoch = params.olh_epoch; @@ -2221,7 +2222,7 @@ int RadosMultipartUpload::abort(const DoutPrefixProvider *dpp, CephContext *cct, } std::unique_ptr<rgw::sal::Object::DeleteOp> del_op = meta_obj->get_delete_op(); - del_op->params.bucket_owner = bucket->get_info().owner; + del_op->params.bucket_owner.id = bucket->get_info().owner; del_op->params.versioning_status = 0; if (!remove_objs.empty()) { del_op->params.remove_objs = &remove_objs; @@ -2273,7 +2274,7 @@ int RadosMultipartUpload::init(const DoutPrefixProvider *dpp, optional_yield y, RGWRados::Object::Write obj_op(&op_target); op_target.set_versioning_disabled(true); /* no versioning for multipart meta */ - obj_op.meta.owner = owner.get_id(); + obj_op.meta.owner = owner.id; obj_op.meta.category = RGWObjCategory::MultiMeta; obj_op.meta.flags = PUT_OBJ_CREATE_EXCL; obj_op.meta.mtime = &mtime; @@ -2562,7 +2563,7 @@ int RadosMultipartUpload::complete(const DoutPrefixProvider *dpp, obj_op.meta.remove_objs = &remove_objs; obj_op.meta.ptag = &tag; /* use req_id as operation tag */ - obj_op.meta.owner = owner.get_id(); + obj_op.meta.owner = owner.id; obj_op.meta.flags = PUT_OBJ_CREATE; obj_op.meta.modify_tail = true; obj_op.meta.completeMultipart = true; diff --git a/src/rgw/driver/rados/rgw_sal_rados.h b/src/rgw/driver/rados/rgw_sal_rados.h index 7efd7757c3c..fbf14410c17 100644 --- a/src/rgw/driver/rados/rgw_sal_rados.h +++ b/src/rgw/driver/rados/rgw_sal_rados.h @@ -271,7 +271,7 @@ class RadosUser : public StoreUser { optional_yield y, RGWStorageStats* stats, ceph::real_time* last_stats_sync = nullptr, ceph::real_time* last_stats_update = nullptr) override; - virtual int read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb) override; + virtual int read_stats_async(const DoutPrefixProvider *dpp, boost::intrusive_ptr<ReadStatsCB> cb) override; virtual int complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) override; virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter, @@ -512,7 +512,7 @@ class RadosBucket : public StoreBucket { bool* syncstopped = nullptr) override; virtual int read_stats_async(const DoutPrefixProvider *dpp, const bucket_index_layout_generation& idx_layout, - int shard_id, RGWGetBucketStats_CB* ctx) override; + int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx) override; int sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y, RGWBucketEnt* ent) override; int check_bucket_shards(const DoutPrefixProvider* dpp, uint64_t num_objs, diff --git a/src/rgw/driver/rados/rgw_service.cc b/src/rgw/driver/rados/rgw_service.cc index 4fcb1ebdef7..5b78472dfe3 100644 --- a/src/rgw/driver/rados/rgw_service.cc +++ b/src/rgw/driver/rados/rgw_service.cc @@ -17,7 +17,6 @@ #include "services/svc_meta_be_otp.h" #include "services/svc_notify.h" #include "services/svc_otp.h" -#include "services/svc_rados.h" #include "services/svc_zone.h" #include "services/svc_zone_utils.h" #include "services/svc_quota.h" @@ -31,6 +30,7 @@ #include "common/errno.h" #include "rgw_bucket.h" +#include "rgw_cr_rados.h" #include "rgw_datalog.h" #include "rgw_metadata.h" #include "rgw_otp.h" @@ -51,6 +51,7 @@ int RGWServices_Def::init(CephContext *cct, bool have_cache, bool raw, bool run_sync, + librados::Rados* rados, optional_yield y, const DoutPrefixProvider *dpp) { @@ -68,7 +69,6 @@ int RGWServices_Def::init(CephContext *cct, meta_be_otp = std::make_unique<RGWSI_MetaBackend_OTP>(cct); notify = std::make_unique<RGWSI_Notify>(cct); otp = std::make_unique<RGWSI_OTP>(cct); - rados = std::make_unique<RGWSI_RADOS>(cct); zone = std::make_unique<RGWSI_Zone>(cct); zone_utils = std::make_unique<RGWSI_ZoneUtils>(cct); quota = std::make_unique<RGWSI_Quota>(cct); @@ -77,6 +77,8 @@ int RGWServices_Def::init(CephContext *cct, sysobj_core = std::make_unique<RGWSI_SysObj_Core>(cct); user_rados = std::make_unique<RGWSI_User_RADOS>(cct); role_rados = std::make_unique<RGWSI_Role_RADOS>(cct); + async_processor = std::make_unique<RGWAsyncRadosProcessor>( + cct, cct->_conf->rgw_num_async_rados_threads); if (have_cache) { sysobj_cache = std::make_unique<RGWSI_SysObj_Cache>(dpp, cct); @@ -84,8 +86,9 @@ int RGWServices_Def::init(CephContext *cct, vector<RGWSI_MetaBackend *> meta_bes{meta_be_sobj.get(), meta_be_otp.get()}; + async_processor->start(); finisher->init(); - bi_rados->init(zone.get(), rados.get(), bilog_rados.get(), datalog_rados.get()); + bi_rados->init(zone.get(), rados, bilog_rados.get(), datalog_rados.get()); bilog_rados->init(bi_rados.get()); bucket_sobj->init(zone.get(), sysobj.get(), sysobj_cache.get(), bi_rados.get(), meta.get(), meta_be_sobj.get(), @@ -94,27 +97,27 @@ int RGWServices_Def::init(CephContext *cct, sysobj.get(), sysobj_cache.get(), bucket_sobj.get()); - cls->init(zone.get(), rados.get()); - config_key_rados->init(rados.get()); - mdlog->init(rados.get(), zone.get(), sysobj.get(), cls.get()); + cls->init(zone.get(), rados); + config_key_rados->init(rados); + mdlog->init(rados, zone.get(), sysobj.get(), cls.get(), + async_processor.get()); meta->init(sysobj.get(), mdlog.get(), meta_bes); meta_be_sobj->init(sysobj.get(), mdlog.get()); meta_be_otp->init(sysobj.get(), mdlog.get(), cls.get()); - notify->init(zone.get(), rados.get(), finisher.get()); + notify->init(zone.get(), rados, finisher.get()); otp->init(zone.get(), meta.get(), meta_be_otp.get()); - rados->init(); - zone->init(sysobj.get(), rados.get(), sync_modules.get(), bucket_sync_sobj.get()); - zone_utils->init(rados.get(), zone.get()); + zone->init(sysobj.get(), rados, sync_modules.get(), bucket_sync_sobj.get()); + zone_utils->init(rados, zone.get()); quota->init(zone.get()); sync_modules->init(zone.get()); - sysobj_core->core_init(rados.get(), zone.get()); + sysobj_core->core_init(rados, zone.get()); if (have_cache) { - sysobj_cache->init(rados.get(), zone.get(), notify.get()); - sysobj->init(rados.get(), sysobj_cache.get()); + sysobj_cache->init(rados, zone.get(), notify.get()); + sysobj->init(rados, sysobj_cache.get()); } else { - sysobj->init(rados.get(), sysobj_core.get()); + sysobj->init(rados, sysobj_core.get()); } - user_rados->init(rados.get(), zone.get(), sysobj.get(), sysobj_cache.get(), + user_rados->init(rados, zone.get(), sysobj.get(), sysobj_cache.get(), meta.get(), meta_be_sobj.get(), sync_modules.get()); role_rados->init(zone.get(), meta.get(), meta_be_sobj.get(), sysobj.get()); @@ -134,12 +137,6 @@ int RGWServices_Def::init(CephContext *cct, } } - r = rados->start(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: failed to start rados service (" << cpp_strerror(-r) << dendl; - return r; - } - if (!raw) { r = zone->start(y, dpp); if (r < 0) { @@ -149,7 +146,7 @@ int RGWServices_Def::init(CephContext *cct, r = datalog_rados->start(dpp, &zone->get_zone(), zone->get_zone_params(), - rados->get_rados_handle()); + rados); if (r < 0) { ldpp_dout(dpp, 0) << "ERROR: failed to start datalog_rados service (" << cpp_strerror(-r) << dendl; return r; @@ -299,18 +296,18 @@ void RGWServices_Def::shutdown() quota->shutdown(); zone_utils->shutdown(); zone->shutdown(); - rados->shutdown(); + async_processor->stop(); has_shutdown = true; - } - -int RGWServices::do_init(CephContext *_cct, bool have_cache, bool raw, bool run_sync, optional_yield y, const DoutPrefixProvider *dpp) +int RGWServices::do_init(CephContext *_cct, bool have_cache, bool raw, + bool run_sync, librados::Rados* rados, + optional_yield y, const DoutPrefixProvider *dpp) { cct = _cct; - int r = _svc.init(cct, have_cache, raw, run_sync, y, dpp); + int r = _svc.init(cct, have_cache, raw, run_sync, rados, y, dpp); if (r < 0) { return r; } @@ -333,7 +330,6 @@ int RGWServices::do_init(CephContext *_cct, bool have_cache, bool raw, bool run_ meta_be_otp = _svc.meta_be_otp.get(); notify = _svc.notify.get(); otp = _svc.otp.get(); - rados = _svc.rados.get(); zone = _svc.zone.get(); zone_utils = _svc.zone_utils.get(); quota = _svc.quota.get(); @@ -343,6 +339,7 @@ int RGWServices::do_init(CephContext *_cct, bool have_cache, bool raw, bool run_ core = _svc.sysobj_core.get(); user = _svc.user_rados.get(); role = _svc.role_rados.get(); + async_processor = _svc.async_processor.get(); return 0; } diff --git a/src/rgw/driver/rados/rgw_service.h b/src/rgw/driver/rados/rgw_service.h index 4c0b8d8421f..7c05f043a47 100644 --- a/src/rgw/driver/rados/rgw_service.h +++ b/src/rgw/driver/rados/rgw_service.h @@ -62,7 +62,6 @@ class RGWSI_MetaBackend_SObj; class RGWSI_MetaBackend_OTP; class RGWSI_Notify; class RGWSI_OTP; -class RGWSI_RADOS; class RGWSI_Zone; class RGWSI_ZoneUtils; class RGWSI_Quota; @@ -74,6 +73,7 @@ class RGWSI_User; class RGWSI_User_RADOS; class RGWDataChangesLog; class RGWSI_Role_RADOS; +class RGWAsyncRadosProcessor; struct RGWServices_Def { @@ -93,7 +93,6 @@ struct RGWServices_Def std::unique_ptr<RGWSI_MetaBackend_OTP> meta_be_otp; std::unique_ptr<RGWSI_Notify> notify; std::unique_ptr<RGWSI_OTP> otp; - std::unique_ptr<RGWSI_RADOS> rados; std::unique_ptr<RGWSI_Zone> zone; std::unique_ptr<RGWSI_ZoneUtils> zone_utils; std::unique_ptr<RGWSI_Quota> quota; @@ -104,11 +103,14 @@ struct RGWServices_Def std::unique_ptr<RGWSI_User_RADOS> user_rados; std::unique_ptr<RGWDataChangesLog> datalog_rados; std::unique_ptr<RGWSI_Role_RADOS> role_rados; + std::unique_ptr<RGWAsyncRadosProcessor> async_processor; RGWServices_Def(); ~RGWServices_Def(); - int init(CephContext *cct, bool have_cache, bool raw_storage, bool run_sync, optional_yield y, const DoutPrefixProvider *dpp); + int init(CephContext *cct, bool have_cache, bool raw_storage, bool run_sync, + librados::Rados* rados, optional_yield y, + const DoutPrefixProvider *dpp); void shutdown(); }; @@ -137,7 +139,6 @@ struct RGWServices RGWSI_MetaBackend *meta_be_otp{nullptr}; RGWSI_Notify *notify{nullptr}; RGWSI_OTP *otp{nullptr}; - RGWSI_RADOS *rados{nullptr}; RGWSI_Zone *zone{nullptr}; RGWSI_ZoneUtils *zone_utils{nullptr}; RGWSI_Quota *quota{nullptr}; @@ -147,15 +148,21 @@ struct RGWServices RGWSI_SysObj_Core *core{nullptr}; RGWSI_User *user{nullptr}; RGWSI_Role_RADOS *role{nullptr}; + RGWAsyncRadosProcessor* async_processor; - int do_init(CephContext *cct, bool have_cache, bool raw_storage, bool run_sync, optional_yield y, const DoutPrefixProvider *dpp); + int do_init(CephContext *cct, bool have_cache, bool raw_storage, + bool run_sync, librados::Rados* rados, optional_yield y, + const DoutPrefixProvider *dpp); - int init(CephContext *cct, bool have_cache, bool run_sync, optional_yield y, const DoutPrefixProvider *dpp) { - return do_init(cct, have_cache, false, run_sync, y, dpp); + int init(CephContext *cct, bool have_cache, bool run_sync, + librados::Rados* rados, optional_yield y, + const DoutPrefixProvider *dpp) { + return do_init(cct, have_cache, false, run_sync, rados, y, dpp); } - int init_raw(CephContext *cct, bool have_cache, optional_yield y, const DoutPrefixProvider *dpp) { - return do_init(cct, have_cache, true, false, y, dpp); + int init_raw(CephContext *cct, bool have_cache, librados::Rados* rados, + optional_yield y, const DoutPrefixProvider *dpp) { + return do_init(cct, have_cache, true, false, rados, y, dpp); } void shutdown() { _svc.shutdown(); diff --git a/src/rgw/driver/rados/rgw_sync_error_repo.cc b/src/rgw/driver/rados/rgw_sync_error_repo.cc index 44305b60b6b..aca671039ee 100644 --- a/src/rgw/driver/rados/rgw_sync_error_repo.cc +++ b/src/rgw/driver/rados/rgw_sync_error_repo.cc @@ -15,7 +15,6 @@ #include "rgw_sync_error_repo.h" #include "rgw_coroutine.h" #include "rgw_sal.h" -#include "services/svc_rados.h" #include "cls/cmpomap/client.h" namespace rgw::error_repo { @@ -118,16 +117,17 @@ int remove(librados::ObjectWriteOperation& op, } class RGWErrorRepoWriteCR : public RGWSimpleCoroutine { - RGWSI_RADOS::Obj obj; + librados::Rados* rados; + const rgw_raw_obj& raw_obj; std::string key; ceph::real_time timestamp; boost::intrusive_ptr<RGWAioCompletionNotifier> cn; public: - RGWErrorRepoWriteCR(RGWSI_RADOS* rados, const rgw_raw_obj& raw_obj, + RGWErrorRepoWriteCR(librados::Rados* rados, const rgw_raw_obj& raw_obj, const std::string& key, ceph::real_time timestamp) - : RGWSimpleCoroutine(rados->ctx()), - obj(rados->obj(raw_obj)), + : RGWSimpleCoroutine(static_cast<CephContext*>(rados->cct())), + raw_obj(raw_obj), key(key), timestamp(timestamp) {} @@ -137,13 +137,14 @@ class RGWErrorRepoWriteCR : public RGWSimpleCoroutine { if (r < 0) { return r; } - r = obj.open(dpp); + rgw_rados_ref ref; + r = rgw_get_rados_ref(dpp, rados, raw_obj, &ref); if (r < 0) { return r; } cn = stack->create_completion_notifier(); - return obj.aio_operate(cn->completion(), &op); + return ref.aio_operate(cn->completion(), &op); } int request_complete() override { @@ -151,7 +152,7 @@ class RGWErrorRepoWriteCR : public RGWSimpleCoroutine { } }; -RGWCoroutine* write_cr(RGWSI_RADOS* rados, +RGWCoroutine* write_cr(librados::Rados* rados, const rgw_raw_obj& obj, const std::string& key, ceph::real_time timestamp) @@ -161,16 +162,17 @@ RGWCoroutine* write_cr(RGWSI_RADOS* rados, class RGWErrorRepoRemoveCR : public RGWSimpleCoroutine { - RGWSI_RADOS::Obj obj; + librados::Rados* rados; + const rgw_raw_obj& raw_obj; std::string key; ceph::real_time timestamp; boost::intrusive_ptr<RGWAioCompletionNotifier> cn; public: - RGWErrorRepoRemoveCR(RGWSI_RADOS* rados, const rgw_raw_obj& raw_obj, + RGWErrorRepoRemoveCR(librados::Rados* rados, const rgw_raw_obj& raw_obj, const std::string& key, ceph::real_time timestamp) - : RGWSimpleCoroutine(rados->ctx()), - obj(rados->obj(raw_obj)), + : RGWSimpleCoroutine(static_cast<CephContext*>(rados->cct())), + raw_obj(raw_obj), key(key), timestamp(timestamp) {} @@ -180,13 +182,14 @@ class RGWErrorRepoRemoveCR : public RGWSimpleCoroutine { if (r < 0) { return r; } - r = obj.open(dpp); + rgw_rados_ref ref; + r = rgw_get_rados_ref(dpp, rados, raw_obj, &ref); if (r < 0) { return r; } cn = stack->create_completion_notifier(); - return obj.aio_operate(cn->completion(), &op); + return ref.aio_operate(cn->completion(), &op); } int request_complete() override { @@ -194,7 +197,7 @@ class RGWErrorRepoRemoveCR : public RGWSimpleCoroutine { } }; -RGWCoroutine* remove_cr(RGWSI_RADOS* rados, +RGWCoroutine* remove_cr(librados::Rados* rados, const rgw_raw_obj& obj, const std::string& key, ceph::real_time timestamp) diff --git a/src/rgw/driver/rados/rgw_sync_error_repo.h b/src/rgw/driver/rados/rgw_sync_error_repo.h index 60525d281f0..7760c9fe198 100644 --- a/src/rgw/driver/rados/rgw_sync_error_repo.h +++ b/src/rgw/driver/rados/rgw_sync_error_repo.h @@ -19,7 +19,6 @@ #include "include/buffer_fwd.h" #include "common/ceph_time.h" -class RGWSI_RADOS; class RGWCoroutine; struct rgw_raw_obj; struct rgw_bucket_shard; @@ -42,7 +41,7 @@ ceph::real_time decode_value(const ceph::bufferlist& bl); int write(librados::ObjectWriteOperation& op, const std::string& key, ceph::real_time timestamp); -RGWCoroutine* write_cr(RGWSI_RADOS* rados, +RGWCoroutine* write_cr(librados::Rados* rados, const rgw_raw_obj& obj, const std::string& key, ceph::real_time timestamp); @@ -51,7 +50,7 @@ RGWCoroutine* write_cr(RGWSI_RADOS* rados, int remove(librados::ObjectWriteOperation& op, const std::string& key, ceph::real_time timestamp); -RGWCoroutine* remove_cr(RGWSI_RADOS* rados, +RGWCoroutine* remove_cr(librados::Rados* rados, const rgw_raw_obj& obj, const std::string& key, ceph::real_time timestamp); diff --git a/src/rgw/driver/rados/rgw_sync_module_aws.cc b/src/rgw/driver/rados/rgw_sync_module_aws.cc index 46b99968a50..3c269a74949 100644 --- a/src/rgw/driver/rados/rgw_sync_module_aws.cc +++ b/src/rgw/driver/rados/rgw_sync_module_aws.cc @@ -705,7 +705,6 @@ static int do_decode_rest_obj(const DoutPrefixProvider *dpp, CephContext *cct, m } } - info->acls.set_ctx(cct); auto aiter = attrs.find(RGW_ATTR_ACL); if (aiter != attrs.end()) { bufferlist& bl = aiter->second; diff --git a/src/rgw/driver/rados/rgw_sync_module_es.cc b/src/rgw/driver/rados/rgw_sync_module_es.cc index 13c2057bf77..e3353dc1fc7 100644 --- a/src/rgw/driver/rados/rgw_sync_module_es.cc +++ b/src/rgw/driver/rados/rgw_sync_module_es.cc @@ -501,15 +501,12 @@ struct es_obj_metadata { const RGWAccessControlList& acl = policy.get_acl(); - permissions.insert(policy.get_owner().get_id().to_str()); - for (auto acliter : acl.get_grant_map()) { + permissions.insert(policy.get_owner().id.to_str()); + for (const auto& acliter : acl.get_grant_map()) { const ACLGrant& grant = acliter.second; - if (grant.get_type().get_type() == ACL_TYPE_CANON_USER && - ((uint32_t)grant.get_permission().get_permissions() & RGW_PERM_READ) != 0) { - rgw_user user; - if (grant.get_id(user)) { - permissions.insert(user.to_str()); - } + const auto* user = grant.get_user(); + if (user && (grant.get_permission().get_permissions() & RGW_PERM_READ) != 0) { + permissions.insert(user->id.to_str()); } } } else if (attr_name == RGW_ATTR_TAGS) { diff --git a/src/rgw/driver/rados/rgw_sync_module_es_rest.cc b/src/rgw/driver/rados/rgw_sync_module_es_rest.cc index db9d48adb36..b456f0b4097 100644 --- a/src/rgw/driver/rados/rgw_sync_module_es_rest.cc +++ b/src/rgw/driver/rados/rgw_sync_module_es_rest.cc @@ -347,7 +347,7 @@ public: s->formatter->dump_format("ETag", "\"%s\"", e.meta.etag.c_str()); s->formatter->dump_string("ContentType", e.meta.content_type.c_str()); s->formatter->dump_string("StorageClass", e.meta.storage_class.c_str()); - dump_owner(s, e.owner.get_id(), e.owner.get_display_name()); + dump_owner(s, e.owner.id, e.owner.display_name); s->formatter->open_array_section("CustomMetadata"); for (auto& m : e.meta.custom_str) { s->formatter->open_object_section("Entry"); diff --git a/src/rgw/driver/rados/rgw_tools.cc b/src/rgw/driver/rados/rgw_tools.cc index 635e5ce887d..269c790e401 100644 --- a/src/rgw/driver/rados/rgw_tools.cc +++ b/src/rgw/driver/rados/rgw_tools.cc @@ -1,6 +1,8 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab ft=cpp +#include "auth/AuthRegistry.h" + #include "common/errno.h" #include "librados/librados_asio.h" @@ -95,6 +97,24 @@ int rgw_init_ioctx(const DoutPrefixProvider *dpp, return 0; } +int rgw_get_rados_ref(const DoutPrefixProvider* dpp, librados::Rados* rados, + rgw_raw_obj obj, rgw_rados_ref* ref) +{ + ref->obj = std::move(obj); + + int r = rgw_init_ioctx(dpp, rados, ref->obj.pool, + ref->ioctx, true, false); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: creating ioctx (pool=" << ref->obj.pool + << "); r=" << r << dendl; + return r; + } + + ref->ioctx.locator_set_key(ref->obj.loc); + return 0; +} + + map<string, bufferlist>* no_change_attrs() { static map<string, bufferlist> no_change; return &no_change; @@ -335,8 +355,7 @@ int RGWDataAccess::Object::put(bufferlist& data, string req_id = driver->zone_unique_id(driver->get_new_req_id()); std::unique_ptr<rgw::sal::Writer> processor; - processor = driver->get_atomic_writer(dpp, y, obj.get(), - owner.get_id(), + processor = driver->get_atomic_writer(dpp, y, obj.get(), owner.id, nullptr, olh_epoch, req_id); int ret = processor->prepare(y); @@ -393,9 +412,10 @@ int RGWDataAccess::Object::put(bufferlist& data, } if (!aclbl) { - RGWAccessControlPolicy_S3 policy(cct); + RGWAccessControlPolicy policy; - policy.create_canned(bucket->policy.get_owner(), bucket->policy.get_owner(), string()); /* default private policy */ + const auto& owner = bucket->policy.get_owner(); + policy.create_default(owner.id, owner.display_name); // default private policy policy.encode(aclbl.emplace()); } @@ -435,3 +455,82 @@ void rgw_complete_aio_completion(librados::AioCompletion* c, int r) { librados::CB_AioCompleteAndSafe cb(pc); cb(r); } + +bool rgw_check_secure_mon_conn(const DoutPrefixProvider *dpp) +{ + AuthRegistry reg(dpp->get_cct()); + + reg.refresh_config(); + + std::vector<uint32_t> methods; + std::vector<uint32_t> modes; + + reg.get_supported_methods(CEPH_ENTITY_TYPE_MON, &methods, &modes); + ldpp_dout(dpp, 20) << __func__ << "(): auth registy supported: methods=" << methods << " modes=" << modes << dendl; + + for (auto method : methods) { + if (!reg.is_secure_method(method)) { + ldpp_dout(dpp, 20) << __func__ << "(): method " << method << " is insecure" << dendl; + return false; + } + } + + for (auto mode : modes) { + if (!reg.is_secure_mode(mode)) { + ldpp_dout(dpp, 20) << __func__ << "(): mode " << mode << " is insecure" << dendl; + return false; + } + } + + return true; +} + +int rgw_clog_warn(librados::Rados* h, const string& msg) +{ + string cmd = + "{" + "\"prefix\": \"log\", " + "\"level\": \"warn\", " + "\"logtext\": [\"" + msg + "\"]" + "}"; + + bufferlist inbl; + return h->mon_command(cmd, inbl, nullptr, nullptr); +} + +int rgw_list_pool(const DoutPrefixProvider *dpp, + librados::IoCtx& ioctx, + uint32_t max, + const rgw::AccessListFilter& filter, + std::string& marker, + std::vector<string> *oids, + bool *is_truncated) +{ + librados::ObjectCursor oc; + if (!oc.from_str(marker)) { + ldpp_dout(dpp, 10) << "failed to parse cursor: " << marker << dendl; + return -EINVAL; + } + + auto iter = ioctx.nobjects_begin(oc); + /// Pool_iterate + if (iter == ioctx.nobjects_end()) + return -ENOENT; + + for (; oids->size() < max && iter != ioctx.nobjects_end(); ++iter) { + string oid = iter->get_oid(); + ldpp_dout(dpp, 20) << "RGWRados::pool_iterate: got " << oid << dendl; + + // fill it in with initial values; we may correct later + if (filter && !filter(oid, oid)) + continue; + + oids->push_back(oid); + } + + marker = iter.get_cursor().to_str(); + if (is_truncated) + *is_truncated = (iter != ioctx.nobjects_end()); + + return oids->size(); +} diff --git a/src/rgw/driver/rados/rgw_tools.h b/src/rgw/driver/rados/rgw_tools.h index 607b064b177..27a8b424ecc 100644 --- a/src/rgw/driver/rados/rgw_tools.h +++ b/src/rgw/driver/rados/rgw_tools.h @@ -3,7 +3,9 @@ #pragma once +#include <functional> #include <string> +#include <string_view> #include "include/types.h" #include "include/ceph_hash.h" @@ -21,7 +23,6 @@ class optional_yield; struct obj_version; - int rgw_init_ioctx(const DoutPrefixProvider *dpp, librados::Rados *rados, const rgw_pool& pool, librados::IoCtx& ioctx, @@ -104,6 +105,63 @@ int rgw_rados_notify(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, cons bufferlist& bl, uint64_t timeout_ms, bufferlist* pbl, optional_yield y); +struct rgw_rados_ref { + librados::IoCtx ioctx; + rgw_raw_obj obj; + + + int operate(const DoutPrefixProvider* dpp, librados::ObjectReadOperation* op, + bufferlist* pbl, optional_yield y, int flags = 0) { + return rgw_rados_operate(dpp, ioctx, obj.oid, op, pbl, y, flags); + } + + int operate(const DoutPrefixProvider* dpp, librados::ObjectWriteOperation* op, + optional_yield y, int flags = 0) { + return rgw_rados_operate(dpp, ioctx, obj.oid, op, y, flags); + } + + int aio_operate(librados::AioCompletion* c, + librados::ObjectWriteOperation* op) { + return ioctx.aio_operate(obj.oid, c, op); + } + + int aio_operate(librados::AioCompletion* c, librados::ObjectReadOperation* op, + bufferlist *pbl) { + return ioctx.aio_operate(obj.oid, c, op, pbl); + } + + int watch(uint64_t* handle, librados::WatchCtx2* ctx) { + return ioctx.watch2(obj.oid, handle, ctx); + } + + int aio_watch(librados::AioCompletion* c, uint64_t* handle, + librados::WatchCtx2 *ctx) { + return ioctx.aio_watch(obj.oid, c, handle, ctx); + } + + int unwatch(uint64_t handle) { + return ioctx.unwatch2(handle); + } + + int notify(const DoutPrefixProvider* dpp, bufferlist& bl, uint64_t timeout_ms, + bufferlist* pbl, optional_yield y) { + return rgw_rados_notify(dpp, ioctx, obj.oid, bl, timeout_ms, pbl, y); + } + + void notify_ack(uint64_t notify_id, uint64_t cookie, bufferlist& bl) { + ioctx.notify_ack(obj.oid, notify_id, cookie, bl); + } +}; + +inline std::ostream& operator <<(std::ostream& m, const rgw_rados_ref& ref) { + return m << ref.obj; +} + +int rgw_get_rados_ref(const DoutPrefixProvider* dpp, librados::Rados* rados, + rgw_raw_obj obj, rgw_rados_ref* ref); + + + int rgw_tools_init(const DoutPrefixProvider *dpp, CephContext *cct); void rgw_tools_cleanup(); @@ -274,3 +332,14 @@ void rgw_complete_aio_completion(librados::AioCompletion* c, int r); // (Currently providing nullptr will wipe all attributes.) std::map<std::string, ceph::buffer::list>* no_change_attrs(); + +bool rgw_check_secure_mon_conn(const DoutPrefixProvider *dpp); +int rgw_clog_warn(librados::Rados* h, const std::string& msg); + +int rgw_list_pool(const DoutPrefixProvider *dpp, + librados::IoCtx& ioctx, + uint32_t max, + const rgw::AccessListFilter& filter, + std::string& marker, + std::vector<std::string> *oids, + bool *is_truncated); diff --git a/src/rgw/driver/rados/rgw_trim_bilog.cc b/src/rgw/driver/rados/rgw_trim_bilog.cc index 16ced4c2ac5..db998047005 100644 --- a/src/rgw/driver/rados/rgw_trim_bilog.cc +++ b/src/rgw/driver/rados/rgw_trim_bilog.cc @@ -270,18 +270,18 @@ class BucketTrimWatcher : public librados::WatchCtx2 { } // register a watch on the realm's control object - r = ref.pool.ioctx().watch2(ref.obj.oid, &handle, this); + r = ref.ioctx.watch2(ref.obj.oid, &handle, this); if (r == -ENOENT) { constexpr bool exclusive = true; - r = ref.pool.ioctx().create(ref.obj.oid, exclusive); + r = ref.ioctx.create(ref.obj.oid, exclusive); if (r == -EEXIST || r == 0) { - r = ref.pool.ioctx().watch2(ref.obj.oid, &handle, this); + r = ref.ioctx.watch2(ref.obj.oid, &handle, this); } } if (r < 0) { ldpp_dout(dpp, -1) << "Failed to watch " << ref.obj << " with " << cpp_strerror(-r) << dendl; - ref.pool.ioctx().close(); + ref.ioctx.close(); return r; } @@ -290,24 +290,24 @@ class BucketTrimWatcher : public librados::WatchCtx2 { } int restart() { - int r = ref.pool.ioctx().unwatch2(handle); + int r = ref.ioctx.unwatch2(handle); if (r < 0) { lderr(store->ctx()) << "Failed to unwatch on " << ref.obj << " with " << cpp_strerror(-r) << dendl; } - r = ref.pool.ioctx().watch2(ref.obj.oid, &handle, this); + r = ref.ioctx.watch2(ref.obj.oid, &handle, this); if (r < 0) { lderr(store->ctx()) << "Failed to restart watch on " << ref.obj << " with " << cpp_strerror(-r) << dendl; - ref.pool.ioctx().close(); + ref.ioctx.close(); } return r; } void stop() { if (handle) { - ref.pool.ioctx().unwatch2(handle); - ref.pool.ioctx().close(); + ref.ioctx.unwatch2(handle); + ref.ioctx.close(); } } @@ -332,7 +332,7 @@ class BucketTrimWatcher : public librados::WatchCtx2 { } catch (const buffer::error& e) { lderr(store->ctx()) << "Failed to decode notification: " << e.what() << dendl; } - ref.pool.ioctx().notify_ack(ref.obj.oid, notify_id, cookie, reply); + ref.ioctx.notify_ack(ref.obj.oid, notify_id, cookie, reply); } /// reestablish the watch if it gets disconnected @@ -617,7 +617,7 @@ int BucketTrimInstanceCR::operate(const DoutPrefixProvider *dpp) get_policy_params.zone = zone_id; get_policy_params.bucket = bucket; - yield call(new RGWBucketGetSyncPolicyHandlerCR(store->svc()->rados->get_async_processor(), + yield call(new RGWBucketGetSyncPolicyHandlerCR(store->svc()->async_processor, store, get_policy_params, source_policy, @@ -728,14 +728,14 @@ int BucketTrimInstanceCR::operate(const DoutPrefixProvider *dpp) } while (clean_info && retries < MAX_RETRIES) { yield call(new RGWPutBucketInstanceInfoCR( - store->svc()->rados->get_async_processor(), + store->svc()->async_processor, store, clean_info->first, false, {}, no_change_attrs(), dpp)); // Raced, try again. if (retcode == -ECANCELED) { yield call(new RGWGetBucketInstanceInfoCR( - store->svc()->rados->get_async_processor(), + store->svc()->async_processor, store, clean_info->first.bucket, &(clean_info->first), nullptr, dpp)); if (retcode < 0) { @@ -1132,7 +1132,7 @@ int BucketTrimCR::operate(const DoutPrefixProvider *dpp) return buckets.size() < config.buckets_per_interval; }; - call(new MetadataListCR(cct, store->svc()->rados->get_async_processor(), + call(new MetadataListCR(cct, store->svc()->async_processor, store->ctl()->meta.mgr, section, status.marker, cb)); } @@ -1219,7 +1219,7 @@ int BucketTrimPollCR::operate(const DoutPrefixProvider *dpp) // prevent others from trimming for our entire wait interval set_status("acquiring trim lock"); - yield call(new RGWSimpleRadosLockCR(store->svc()->rados->get_async_processor(), store, + yield call(new RGWSimpleRadosLockCR(store->svc()->async_processor, store, obj, name, cookie, config.trim_interval_sec)); if (retcode < 0) { @@ -1232,7 +1232,7 @@ int BucketTrimPollCR::operate(const DoutPrefixProvider *dpp) if (retcode < 0) { // on errors, unlock so other gateways can try set_status("unlocking"); - yield call(new RGWSimpleRadosUnlockCR(store->svc()->rados->get_async_processor(), store, + yield call(new RGWSimpleRadosUnlockCR(store->svc()->async_processor, store, obj, name, cookie)); } } diff --git a/src/rgw/driver/rados/rgw_trim_datalog.cc b/src/rgw/driver/rados/rgw_trim_datalog.cc index bac0cda8dd6..5dcddb659e1 100644 --- a/src/rgw/driver/rados/rgw_trim_datalog.cc +++ b/src/rgw/driver/rados/rgw_trim_datalog.cc @@ -224,7 +224,7 @@ int DataLogTrimPollCR::operate(const DoutPrefixProvider *dpp) // prevent other gateways from attempting to trim for the duration set_status("acquiring trim lock"); - yield call(new RGWSimpleRadosLockCR(store->svc()->rados->get_async_processor(), store, + yield call(new RGWSimpleRadosLockCR(store->svc()->async_processor, store, rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, lock_oid), "data_trim", lock_cookie, // interval is a small number and unlikely to overflow diff --git a/src/rgw/driver/rados/rgw_trim_mdlog.cc b/src/rgw/driver/rados/rgw_trim_mdlog.cc index 13773ae2877..be513b6e74d 100644 --- a/src/rgw/driver/rados/rgw_trim_mdlog.cc +++ b/src/rgw/driver/rados/rgw_trim_mdlog.cc @@ -565,7 +565,7 @@ class MetaPeerTrimShardCollectCR : public RGWShardCollectCR { env(env), mdlog(mdlog), period_id(env.current.get_period().get_id()) { meta_env.init(env.dpp, cct, env.store, env.store->svc()->zone->get_master_conn(), - env.store->svc()->rados->get_async_processor(), env.http, nullptr, + env.store->svc()->async_processor, env.http, nullptr, env.store->getRados()->get_sync_tracer()); } @@ -669,7 +669,7 @@ int MetaTrimPollCR::operate(const DoutPrefixProvider *dpp) // prevent others from trimming for our entire wait interval set_status("acquiring trim lock"); - yield call(new RGWSimpleRadosLockCR(store->svc()->rados->get_async_processor(), store, + yield call(new RGWSimpleRadosLockCR(store->svc()->async_processor, store, obj, name, cookie, // interval is a small number and unlikely to overflow // coverity[store_truncates_time_t:SUPPRESS] @@ -685,7 +685,7 @@ int MetaTrimPollCR::operate(const DoutPrefixProvider *dpp) if (retcode < 0) { // on errors, unlock so other gateways can try set_status("unlocking"); - yield call(new RGWSimpleRadosUnlockCR(store->svc()->rados->get_async_processor(), store, + yield call(new RGWSimpleRadosUnlockCR(store->svc()->async_processor, store, obj, name, cookie)); } } diff --git a/src/rgw/driver/rados/rgw_user.cc b/src/rgw/driver/rados/rgw_user.cc index 506ac0acf11..b5569e481c5 100644 --- a/src/rgw/driver/rados/rgw_user.cc +++ b/src/rgw/driver/rados/rgw_user.cc @@ -237,7 +237,7 @@ int RGWAccessKeyPool::init(RGWUserAdminOpState& op_state) } const rgw_user& uid = op_state.get_user_id(); - if (uid.compare(RGW_USER_ANON_ID) == 0) { + if (uid == rgw_user(RGW_USER_ANON_ID)) { keys_allowed = false; return -EINVAL; } @@ -890,7 +890,7 @@ int RGWSubUserPool::init(RGWUserAdminOpState& op_state) } const rgw_user& uid = op_state.get_user_id(); - if (uid.compare(RGW_USER_ANON_ID) == 0) { + if (uid == rgw_user(RGW_USER_ANON_ID)) { subusers_allowed = false; return -EACCES; } @@ -1198,7 +1198,7 @@ int RGWUserCapPool::init(RGWUserAdminOpState& op_state) } const rgw_user& uid = op_state.get_user_id(); - if (uid.compare(RGW_USER_ANON_ID) == 0) { + if (uid == rgw_user(RGW_USER_ANON_ID)) { caps_allowed = false; return -EACCES; } @@ -1373,7 +1373,7 @@ int RGWUser::init(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state, } } - if (!user_id.empty() && (user_id.compare(RGW_USER_ANON_ID) != 0)) { + if (!user_id.empty() && user_id != rgw_user(RGW_USER_ANON_ID)) { user = driver->get_user(user_id); found = (user->load_user(dpp, y) >= 0); op_state.found_by_uid = found; @@ -1477,12 +1477,12 @@ int RGWUser::check_op(RGWUserAdminOpState& op_state, std::string *err_msg) int ret = 0; const rgw_user& uid = op_state.get_user_id(); - if (uid.compare(RGW_USER_ANON_ID) == 0) { + if (uid == rgw_user(RGW_USER_ANON_ID)) { set_err_msg(err_msg, "unable to perform operations on the anonymous user"); return -EINVAL; } - if (is_populated() && user_id.compare(uid) != 0) { + if (is_populated() && user_id != uid) { set_err_msg(err_msg, "user id mismatch, operation id: " + uid.to_str() + " does not match: " + user_id.to_str()); @@ -1858,7 +1858,7 @@ int RGWUser::execute_modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& } // ensure that we can modify the user's attributes - if (user_id.compare(RGW_USER_ANON_ID) == 0) { + if (user_id == rgw_user(RGW_USER_ANON_ID)) { set_err_msg(err_msg, "unable to modify anonymous user's info"); return -EACCES; } @@ -1870,7 +1870,7 @@ int RGWUser::execute_modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState& // make sure we are not adding a duplicate email if (old_email != op_email) { ret = driver->get_user_by_email(dpp, op_email, y, &duplicate_check); - if (ret >= 0 && duplicate_check->get_id().compare(user_id) != 0) { + if (ret >= 0 && duplicate_check->get_id() != user_id) { set_err_msg(err_msg, "cannot add duplicate email"); return -ERR_EMAIL_EXIST; } diff --git a/src/rgw/driver/rados/rgw_user.h b/src/rgw/driver/rados/rgw_user.h index d5eb167daa3..a0cd7ed84fe 100644 --- a/src/rgw/driver/rados/rgw_user.h +++ b/src/rgw/driver/rados/rgw_user.h @@ -30,8 +30,6 @@ class RGWUserCtl; class RGWBucketCtl; class RGWUserBuckets; -class RGWGetUserStats_CB; - /** * A string wrapper that includes encode/decode functions * for easily accessing a UID in all forms diff --git a/src/rgw/driver/rados/sync_fairness.cc b/src/rgw/driver/rados/sync_fairness.cc index ded1cf56a71..1ac27f4bacc 100644 --- a/src/rgw/driver/rados/sync_fairness.cc +++ b/src/rgw/driver/rados/sync_fairness.cc @@ -144,18 +144,18 @@ class Watcher : public librados::WatchCtx2 { } // register a watch on the control object - r = ref.pool.ioctx().watch2(ref.obj.oid, &handle, this); + r = ref.ioctx.watch2(ref.obj.oid, &handle, this); if (r == -ENOENT) { constexpr bool exclusive = true; - r = ref.pool.ioctx().create(ref.obj.oid, exclusive); + r = ref.ioctx.create(ref.obj.oid, exclusive); if (r == -EEXIST || r == 0) { - r = ref.pool.ioctx().watch2(ref.obj.oid, &handle, this); + r = ref.ioctx.watch2(ref.obj.oid, &handle, this); } } if (r < 0) { ldpp_dout(dpp, -1) << "Failed to watch " << ref.obj << " with " << cpp_strerror(-r) << dendl; - ref.pool.ioctx().close(); + ref.ioctx.close(); return r; } @@ -165,16 +165,16 @@ class Watcher : public librados::WatchCtx2 { int restart() { - int r = ref.pool.ioctx().unwatch2(handle); + int r = ref.ioctx.unwatch2(handle); if (r < 0) { ldpp_dout(dpp, -1) << "Failed to unwatch on " << ref.obj << " with " << cpp_strerror(-r) << dendl; } - r = ref.pool.ioctx().watch2(ref.obj.oid, &handle, this); + r = ref.ioctx.watch2(ref.obj.oid, &handle, this); if (r < 0) { ldpp_dout(dpp, -1) << "Failed to restart watch on " << ref.obj << " with " << cpp_strerror(-r) << dendl; - ref.pool.ioctx().close(); + ref.ioctx.close(); } return r; } @@ -182,8 +182,8 @@ class Watcher : public librados::WatchCtx2 { void stop() { if (handle) { - ref.pool.ioctx().unwatch2(handle); - ref.pool.ioctx().close(); + ref.ioctx.unwatch2(handle); + ref.ioctx.close(); } } @@ -210,7 +210,7 @@ class Watcher : public librados::WatchCtx2 { bufferlist reply; encode(response, reply); - ref.pool.ioctx().notify_ack(ref.obj.oid, notify_id, cookie, reply); + ref.ioctx.notify_ack(ref.obj.oid, notify_id, cookie, reply); } // reestablish the watch if it gets disconnected @@ -289,7 +289,7 @@ class RadosBidManager : public BidManager, public Server, public DoutPrefix { my_bids = this->my_bids; } - bool is_highest_bidder(std::size_t index) + bool is_highest_bidder(std::size_t index) override { auto lock = std::scoped_lock{mutex}; const bid_value my_bid = my_bids.at(index); // may throw @@ -303,7 +303,7 @@ class RadosBidManager : public BidManager, public Server, public DoutPrefix { return true; } - RGWCoroutine* notify_cr() + RGWCoroutine* notify_cr() override { auto lock = std::scoped_lock{mutex}; return new NotifyCR(store, this, obj, my_bids); diff --git a/src/rgw/rgw_acl.cc b/src/rgw/rgw_acl.cc index f32a73f2652..4369bedfc76 100644 --- a/src/rgw/rgw_acl.cc +++ b/src/rgw/rgw_acl.cc @@ -33,10 +33,7 @@ bool operator!=(const ACLGranteeType& lhs, const ACLGranteeType& rhs) { } bool operator==(const ACLGrant& lhs, const ACLGrant& rhs) { - return lhs.type == rhs.type && lhs.id == rhs.id - && lhs.email == rhs.email && lhs.permission == rhs.permission - && lhs.name == rhs.name && lhs.group == rhs.group - && lhs.url_spec == rhs.url_spec; + return lhs.grantee == rhs.grantee && lhs.permission == rhs.permission; } bool operator!=(const ACLGrant& lhs, const ACLGrant& rhs) { return !(lhs == rhs); @@ -61,13 +58,6 @@ bool operator!=(const RGWAccessControlList& lhs, return !(lhs == rhs); } -bool operator==(const ACLOwner& lhs, const ACLOwner& rhs) { - return lhs.id == rhs.id && lhs.display_name == rhs.display_name; -} -bool operator!=(const ACLOwner& lhs, const ACLOwner& rhs) { - return !(lhs == rhs); -} - bool operator==(const RGWAccessControlPolicy& lhs, const RGWAccessControlPolicy& rhs) { return lhs.acl == rhs.acl && lhs.owner == rhs.owner; @@ -77,59 +67,49 @@ bool operator!=(const RGWAccessControlPolicy& lhs, return !(lhs == rhs); } -void RGWAccessControlList::_add_grant(ACLGrant *grant) +void RGWAccessControlList::register_grant(const ACLGrant& grant) { - ACLPermission& perm = grant->get_permission(); - ACLGranteeType& type = grant->get_type(); - switch (type.get_type()) { - case ACL_TYPE_REFERER: - referer_list.emplace_back(grant->get_referer(), perm.get_permissions()); + ACLPermission perm = grant.get_permission(); + + if (const auto* user = grant.get_user(); user) { + acl_user_map[user->id.to_str()] |= perm.get_permissions(); + } else if (const auto* email = grant.get_email(); email) { + acl_user_map[email->address] |= perm.get_permissions(); + } else if (const auto* group = grant.get_group(); group) { + acl_group_map[group->type] |= perm.get_permissions(); + } else if (const auto* referer = grant.get_referer(); referer) { + referer_list.emplace_back(referer->url_spec, perm.get_permissions()); /* We're specially handling the Swift's .r:* as the S3 API has a similar * concept and thus we can have a small portion of compatibility here. */ - if (grant->get_referer() == RGW_REFERER_WILDCARD) { + if (referer->url_spec == RGW_REFERER_WILDCARD) { acl_group_map[ACL_GROUP_ALL_USERS] |= perm.get_permissions(); } - break; - case ACL_TYPE_GROUP: - acl_group_map[grant->get_group()] |= perm.get_permissions(); - break; - default: - { - rgw_user id; - if (!grant->get_id(id)) { - ldout(cct, 0) << "ERROR: grant->get_id() failed" << dendl; - } - acl_user_map[id.to_str()] |= perm.get_permissions(); - } } } -void RGWAccessControlList::add_grant(ACLGrant *grant) +void RGWAccessControlList::add_grant(const ACLGrant& grant) { - rgw_user id; - grant->get_id(id); // not that this will return false for groups, but that's ok, we won't search groups - grant_map.insert(pair<string, ACLGrant>(id.to_str(), *grant)); - _add_grant(grant); + std::string id; + if (const auto* user = grant.get_user(); user) { + id = user->id.to_str(); + } else if (const auto* email = grant.get_email(); email) { + id = email->address; + } // other types share the empty key in the grant multimap + grant_map.emplace(id, grant); + register_grant(grant); } -void RGWAccessControlList::remove_canon_user_grant(rgw_user& user_id) +void RGWAccessControlList::remove_canon_user_grant(const rgw_user& user_id) { - auto multi_map_iter = grant_map.find(user_id.to_str()); - if(multi_map_iter != grant_map.end()) { - auto grants = grant_map.equal_range(user_id.to_str()); - grant_map.erase(grants.first, grants.second); - } - - auto map_iter = acl_user_map.find(user_id.to_str()); - if (map_iter != acl_user_map.end()){ - acl_user_map.erase(map_iter); - } + const std::string& key = user_id.to_str(); + grant_map.erase(key); + acl_user_map.erase(key); } uint32_t RGWAccessControlList::get_perm(const DoutPrefixProvider* dpp, const rgw::auth::Identity& auth_identity, - const uint32_t perm_mask) + const uint32_t perm_mask) const { ldpp_dout(dpp, 5) << "Searching permissions for identity=" << auth_identity << " mask=" << perm_mask << dendl; @@ -156,7 +136,7 @@ uint32_t RGWAccessControlList::get_group_perm(const DoutPrefixProvider *dpp, uint32_t RGWAccessControlList::get_referer_perm(const DoutPrefixProvider *dpp, const uint32_t current_perm, const std::string http_referer, - const uint32_t perm_mask) + const uint32_t perm_mask) const { ldpp_dout(dpp, 5) << "Searching permissions for referer=" << http_referer << " mask=" << perm_mask << dendl; @@ -180,14 +160,14 @@ uint32_t RGWAccessControlPolicy::get_perm(const DoutPrefixProvider* dpp, const rgw::auth::Identity& auth_identity, const uint32_t perm_mask, const char * const http_referer, - bool ignore_public_acls) + bool ignore_public_acls) const { ldpp_dout(dpp, 20) << "-- Getting permissions begin with perm_mask=" << perm_mask << dendl; uint32_t perm = acl.get_perm(dpp, auth_identity, perm_mask); - if (auth_identity.is_owner_of(owner.get_id())) { + if (auth_identity.is_owner_of(owner.id)) { perm |= perm_mask & (RGW_PERM_READ_ACP | RGW_PERM_WRITE_ACP); } @@ -211,7 +191,7 @@ uint32_t RGWAccessControlPolicy::get_perm(const DoutPrefixProvider* dpp, } ldpp_dout(dpp, 5) << "-- Getting permissions done for identity=" << auth_identity - << ", owner=" << owner.get_id() + << ", owner=" << owner.id << ", perm=" << perm << dendl; return perm; @@ -222,7 +202,7 @@ bool RGWAccessControlPolicy::verify_permission(const DoutPrefixProvider* dpp, const uint32_t user_perm_mask, const uint32_t perm, const char * const http_referer, - bool ignore_public_acls) + bool ignore_public_acls) const { uint32_t test_perm = perm | RGW_PERM_READ_OBJS | RGW_PERM_WRITE_OBJS; @@ -286,31 +266,36 @@ void ACLGranteeType::dump(Formatter *f) const void ACLGrant::dump(Formatter *f) const { f->open_object_section("type"); - type.dump(f); + get_type().dump(f); f->close_section(); - f->dump_string("id", id.to_str()); - f->dump_string("email", email); + struct dump_visitor { + Formatter* f; - f->open_object_section("permission"); - permission.dump(f); - f->close_section(); + void operator()(const ACLGranteeCanonicalUser& user) { + encode_json("id", user.id, f); + encode_json("name", user.name, f); + } + void operator()(const ACLGranteeEmailUser& email) { + encode_json("email", email.address, f); + } + void operator()(const ACLGranteeGroup& group) { + encode_json("group", static_cast<int>(group.type), f); + } + void operator()(const ACLGranteeUnknown&) {} + void operator()(const ACLGranteeReferer& r) { + encode_json("url_spec", r.url_spec, f); + } + }; + std::visit(dump_visitor{f}, grantee); - f->dump_string("name", name); - f->dump_int("group", (int)group); - f->dump_string("url_spec", url_spec); + encode_json("permission", permission, f); } void ACLGrant::generate_test_instances(list<ACLGrant*>& o) { - rgw_user id("rgw"); - string name, email; - name = "Mr. RGW"; - email = "r@gw"; - ACLGrant *g1 = new ACLGrant; - g1->set_canon(id, name, RGW_PERM_READ); - g1->email = email; + g1->set_canon(rgw_user{"rgw"}, "Mr. RGW", RGW_PERM_READ); o.push_back(g1); ACLGrant *g2 = new ACLGrant; @@ -322,28 +307,22 @@ void ACLGrant::generate_test_instances(list<ACLGrant*>& o) void ACLGranteeType::generate_test_instances(list<ACLGranteeType*>& o) { - ACLGranteeType *t = new ACLGranteeType; - t->set(ACL_TYPE_CANON_USER); - o.push_back(t); + o.push_back(new ACLGranteeType(ACL_TYPE_CANON_USER)); o.push_back(new ACLGranteeType); } void RGWAccessControlList::generate_test_instances(list<RGWAccessControlList*>& o) { - RGWAccessControlList *acl = new RGWAccessControlList(NULL); - - list<ACLGrant *> glist; - list<ACLGrant *>::iterator iter; - - ACLGrant::generate_test_instances(glist); - for (iter = glist.begin(); iter != glist.end(); ++iter) { - ACLGrant *grant = *iter; - acl->add_grant(grant); + RGWAccessControlList *acl = new RGWAccessControlList; + list<ACLGrant *> grants; + ACLGrant::generate_test_instances(grants); + for (ACLGrant* grant : grants) { + acl->add_grant(*grant); delete grant; } o.push_back(acl); - o.push_back(new RGWAccessControlList(NULL)); + o.push_back(new RGWAccessControlList); } void ACLOwner::generate_test_instances(list<ACLOwner*>& o) @@ -363,21 +342,19 @@ void RGWAccessControlPolicy::generate_test_instances(list<RGWAccessControlPolicy RGWAccessControlList::generate_test_instances(acl_list); iter = acl_list.begin(); - RGWAccessControlPolicy *p = new RGWAccessControlPolicy(NULL); + RGWAccessControlPolicy *p = new RGWAccessControlPolicy; RGWAccessControlList *l = *iter; p->acl = *l; - string name = "radosgw"; - rgw_user id("rgw"); - p->owner.set_name(name); - p->owner.set_id(id); + p->owner.id.id = "rgw"; + p->owner.display_name = "radosgw"; o.push_back(p); delete l; } - o.push_back(new RGWAccessControlPolicy(NULL)); + o.push_back(new RGWAccessControlPolicy); } void RGWAccessControlList::dump(Formatter *f) const @@ -434,9 +411,9 @@ void RGWAccessControlPolicy::dump(Formatter *f) const encode_json("owner", owner, f); } -ACLGroupTypeEnum ACLGrant::uri_to_group(string& uri) +ACLGroupTypeEnum ACLGrant::uri_to_group(std::string_view uri) { // this is required for backward compatibility - return ACLGrant_S3::uri_to_group(uri); + return rgw::s3::acl_uri_to_group(uri); } diff --git a/src/rgw/rgw_acl.h b/src/rgw/rgw_acl.h index c520501583b..e2887a7049b 100644 --- a/src/rgw/rgw_acl.h +++ b/src/rgw/rgw_acl.h @@ -6,6 +6,7 @@ #include <map> #include <string> #include <string_view> +#include <variant> #include <include/types.h> #include <boost/optional.hpp> @@ -15,118 +16,180 @@ #include "rgw_basic_types.h" //includes rgw_acl_types.h +// acl grantee types +struct ACLGranteeCanonicalUser { + rgw_user id; + std::string name; + + friend auto operator<=>(const ACLGranteeCanonicalUser&, + const ACLGranteeCanonicalUser&) = default; +}; +struct ACLGranteeEmailUser { + std::string address; + + friend auto operator<=>(const ACLGranteeEmailUser&, + const ACLGranteeEmailUser&) = default; +}; +struct ACLGranteeGroup { + ACLGroupTypeEnum type = ACL_GROUP_NONE; + + friend auto operator<=>(const ACLGranteeGroup&, + const ACLGranteeGroup&) = default; +}; +struct ACLGranteeUnknown { + friend auto operator<=>(const ACLGranteeUnknown&, + const ACLGranteeUnknown&) = default; +}; +struct ACLGranteeReferer { + std::string url_spec; + + friend auto operator<=>(const ACLGranteeReferer&, + const ACLGranteeReferer&) = default; +}; + class ACLGrant { protected: - ACLGranteeType type; - rgw_user id; - std::string email; - mutable rgw_user email_id; + // acl grantee variant, where variant index matches ACLGranteeTypeEnum + using ACLGrantee = std::variant< + ACLGranteeCanonicalUser, + ACLGranteeEmailUser, + ACLGranteeGroup, + ACLGranteeUnknown, + ACLGranteeReferer>; + + ACLGrantee grantee; ACLPermission permission; - std::string name; - ACLGroupTypeEnum group; - std::string url_spec; public: - ACLGrant() : group(ACL_GROUP_NONE) {} - virtual ~ACLGrant() {} - - /* there's an assumption here that email/uri/id encodings are - different and there can't be any overlap */ - bool get_id(rgw_user& _id) const { - switch(type.get_type()) { - case ACL_TYPE_EMAIL_USER: - _id = email; // implies from_str() that parses the 't:u' syntax - return true; - case ACL_TYPE_GROUP: - case ACL_TYPE_REFERER: - return false; - default: - _id = id; - return true; - } + ACLGranteeType get_type() const { + return static_cast<ACLGranteeTypeEnum>(grantee.index()); } + ACLPermission get_permission() const { return permission; } - const rgw_user* get_id() const { - switch(type.get_type()) { - case ACL_TYPE_EMAIL_USER: - email_id.from_str(email); - return &email_id; - case ACL_TYPE_GROUP: - case ACL_TYPE_REFERER: - return nullptr; - default: - return &id; - } + // return the user grantee, or nullptr + const ACLGranteeCanonicalUser* get_user() const { + return std::get_if<ACLGranteeCanonicalUser>(&grantee); + } + // return the email grantee, or nullptr + const ACLGranteeEmailUser* get_email() const { + return std::get_if<ACLGranteeEmailUser>(&grantee); + } + // return the group grantee, or nullptr + const ACLGranteeGroup* get_group() const { + return std::get_if<ACLGranteeGroup>(&grantee); + } + // return the referer grantee, or nullptr + const ACLGranteeReferer* get_referer() const { + return std::get_if<ACLGranteeReferer>(&grantee); } - - ACLGranteeType& get_type() { return type; } - const ACLGranteeType& get_type() const { return type; } - ACLPermission& get_permission() { return permission; } - const ACLPermission& get_permission() const { return permission; } - ACLGroupTypeEnum get_group() const { return group; } - const std::string& get_referer() const { return url_spec; } void encode(bufferlist& bl) const { ENCODE_START(5, 3, bl); + ACLGranteeType type = get_type(); encode(type, bl); - std::string s; - id.to_str(s); - encode(s, bl); - std::string uri; + + if (const ACLGranteeCanonicalUser* user = get_user(); user) { + encode(user->id.to_str(), bl); + } else { + encode(std::string{}, bl); // encode empty id + } + + std::string uri; // always empty, v2 converted to 'ACLGroupTypeEnum g' below encode(uri, bl); - encode(email, bl); + + if (const ACLGranteeEmailUser* email = get_email(); email) { + encode(email->address, bl); + } else { + encode(std::string{}, bl); // encode empty email address + } encode(permission, bl); - encode(name, bl); - __u32 g = (__u32)group; + if (const ACLGranteeCanonicalUser* user = get_user(); user) { + encode(user->name, bl); + } else { + encode(std::string{}, bl); // encode empty name + } + + __u32 g; + if (const ACLGranteeGroup* group = get_group(); group) { + g = static_cast<__u32>(group->type); + } else { + g = static_cast<__u32>(ACL_GROUP_NONE); + } encode(g, bl); - encode(url_spec, bl); + + if (const ACLGranteeReferer* referer = get_referer(); referer) { + encode(referer->url_spec, bl); + } else { + encode(std::string{}, bl); // encode empty referer + } ENCODE_FINISH(bl); } void decode(bufferlist::const_iterator& bl) { DECODE_START_LEGACY_COMPAT_LEN(5, 3, 3, bl); + ACLGranteeType type; decode(type, bl); + + ACLGranteeCanonicalUser user; std::string s; decode(s, bl); - id.from_str(s); + user.id.from_str(s); + std::string uri; decode(uri, bl); - decode(email, bl); + + ACLGranteeEmailUser email; + decode(email.address, bl); + decode(permission, bl); - decode(name, bl); - if (struct_v > 1) { - __u32 g; - decode(g, bl); - group = (ACLGroupTypeEnum)g; - } else { - group = uri_to_group(uri); - } + decode(user.name, bl); + + ACLGranteeGroup group; + __u32 g; + decode(g, bl); + group.type = static_cast<ACLGroupTypeEnum>(g); + + ACLGranteeReferer referer; if (struct_v >= 5) { - decode(url_spec, bl); - } else { - url_spec.clear(); + decode(referer.url_spec, bl); + } + + // construct the grantee type + switch (type) { + case ACL_TYPE_CANON_USER: + grantee = std::move(user); + break; + case ACL_TYPE_EMAIL_USER: + grantee = std::move(email); + break; + case ACL_TYPE_GROUP: + grantee = std::move(group); + break; + case ACL_TYPE_REFERER: + grantee = std::move(referer); + break; + case ACL_TYPE_UNKNOWN: + default: + grantee = ACLGranteeUnknown{}; + break; } DECODE_FINISH(bl); } void dump(Formatter *f) const; static void generate_test_instances(std::list<ACLGrant*>& o); - ACLGroupTypeEnum uri_to_group(std::string& uri); + static ACLGroupTypeEnum uri_to_group(std::string_view uri); - void set_canon(const rgw_user& _id, const std::string& _name, const uint32_t perm) { - type.set(ACL_TYPE_CANON_USER); - id = _id; - name = _name; + void set_canon(const rgw_user& id, const std::string& name, uint32_t perm) { + grantee = ACLGranteeCanonicalUser{id, name}; permission.set_permissions(perm); } - void set_group(ACLGroupTypeEnum _group, const uint32_t perm) { - type.set(ACL_TYPE_GROUP); - group = _group; + void set_group(ACLGroupTypeEnum group, uint32_t perm) { + grantee = ACLGranteeGroup{group}; permission.set_permissions(perm); } - void set_referer(const std::string& _url_spec, const uint32_t perm) { - type.set(ACL_TYPE_REFERER); - url_spec = _url_spec; + void set_referer(const std::string& url_spec, uint32_t perm) { + grantee = ACLGranteeReferer{url_spec}; permission.set_permissions(perm); } @@ -219,31 +282,22 @@ using ACLGrantMap = std::multimap<std::string, ACLGrant>; class RGWAccessControlList { protected: - CephContext *cct; /* FIXME: in the feature we should consider switching to uint32_t also * in data structures. */ std::map<std::string, int> acl_user_map; std::map<uint32_t, int> acl_group_map; std::list<ACLReferer> referer_list; ACLGrantMap grant_map; - void _add_grant(ACLGrant *grant); + // register a grant in the correspoding acl_user/group_map + void register_grant(const ACLGrant& grant); public: - explicit RGWAccessControlList(CephContext *_cct) : cct(_cct) {} - RGWAccessControlList() : cct(NULL) {} - - void set_ctx(CephContext *ctx) { - cct = ctx; - } - - virtual ~RGWAccessControlList() {} - uint32_t get_perm(const DoutPrefixProvider* dpp, const rgw::auth::Identity& auth_identity, - uint32_t perm_mask); + uint32_t perm_mask) const; uint32_t get_group_perm(const DoutPrefixProvider *dpp, ACLGroupTypeEnum group, uint32_t perm_mask) const; uint32_t get_referer_perm(const DoutPrefixProvider *dpp, uint32_t current_perm, std::string http_referer, - uint32_t perm_mask); + uint32_t perm_mask) const; void encode(bufferlist& bl) const { ENCODE_START(4, 3, bl); bool maps_initialized = true; @@ -263,10 +317,9 @@ public: if (struct_v >= 2) { decode(acl_group_map, bl); } else if (!maps_initialized) { - ACLGrantMap::iterator iter; - for (iter = grant_map.begin(); iter != grant_map.end(); ++iter) { - ACLGrant& grant = iter->second; - _add_grant(&grant); + // register everything in the grant_map + for (const auto& [id, grant] : grant_map) { + register_grant(grant); } } if (struct_v >= 4) { @@ -277,20 +330,20 @@ public: void dump(Formatter *f) const; static void generate_test_instances(std::list<RGWAccessControlList*>& o); - void add_grant(ACLGrant *grant); - void remove_canon_user_grant(rgw_user& user_id); + void add_grant(const ACLGrant& grant); + void remove_canon_user_grant(const rgw_user& user_id); ACLGrantMap& get_grant_map() { return grant_map; } const ACLGrantMap& get_grant_map() const { return grant_map; } - void create_default(const rgw_user& id, std::string name) { + void create_default(const rgw_user& id, const std::string& name) { acl_user_map.clear(); acl_group_map.clear(); referer_list.clear(); ACLGrant grant; grant.set_canon(id, name, RGW_PERM_FULL_CONTROL); - add_grant(&grant); + add_grant(grant); } friend bool operator==(const RGWAccessControlList& lhs, const RGWAccessControlList& rhs); @@ -298,15 +351,9 @@ public: }; WRITE_CLASS_ENCODER(RGWAccessControlList) -class ACLOwner -{ -protected: +struct ACLOwner { rgw_user id; std::string display_name; -public: - ACLOwner() {} - ACLOwner(const rgw_user& _id) : id(_id) {} - ~ACLOwner() {} void encode(bufferlist& bl) const { ENCODE_START(3, 2, bl); @@ -327,46 +374,29 @@ public: void dump(Formatter *f) const; void decode_json(JSONObj *obj); static void generate_test_instances(std::list<ACLOwner*>& o); - void set_id(const rgw_user& _id) { id = _id; } - void set_name(const std::string& name) { display_name = name; } - - rgw_user& get_id() { return id; } - const rgw_user& get_id() const { return id; } - std::string& get_display_name() { return display_name; } - const std::string& get_display_name() const { return display_name; } - friend bool operator==(const ACLOwner& lhs, const ACLOwner& rhs); - friend bool operator!=(const ACLOwner& lhs, const ACLOwner& rhs); + + auto operator<=>(const ACLOwner&) const = default; }; WRITE_CLASS_ENCODER(ACLOwner) class RGWAccessControlPolicy { protected: - CephContext *cct; RGWAccessControlList acl; ACLOwner owner; public: - explicit RGWAccessControlPolicy(CephContext *_cct) : cct(_cct), acl(_cct) {} - RGWAccessControlPolicy() : cct(NULL), acl(NULL) {} - virtual ~RGWAccessControlPolicy() {} - - void set_ctx(CephContext *ctx) { - cct = ctx; - acl.set_ctx(ctx); - } - uint32_t get_perm(const DoutPrefixProvider* dpp, const rgw::auth::Identity& auth_identity, uint32_t perm_mask, const char * http_referer, - bool ignore_public_acls=false); + bool ignore_public_acls=false) const; bool verify_permission(const DoutPrefixProvider* dpp, const rgw::auth::Identity& auth_identity, uint32_t user_perm_mask, uint32_t perm, const char * http_referer = nullptr, - bool ignore_public_acls=false); + bool ignore_public_acls=false) const; void encode(bufferlist& bl) const { ENCODE_START(2, 2, bl); @@ -388,15 +418,14 @@ public: DECODE_FINISH(bl); } - void set_owner(ACLOwner& o) { owner = o; } - ACLOwner& get_owner() { - return owner; - } + void set_owner(const ACLOwner& o) { owner = o; } + const ACLOwner& get_owner() const { return owner; } + ACLOwner& get_owner() { return owner; } - void create_default(const rgw_user& id, std::string& name) { + void create_default(const rgw_user& id, const std::string& name) { acl.create_default(id, name); - owner.set_id(id); - owner.set_name(name); + owner.id = id; + owner.display_name = name; } RGWAccessControlList& get_acl() { return acl; @@ -405,7 +434,6 @@ public: return acl; } - virtual bool compare_group_name(std::string& id, ACLGroupTypeEnum group) { return false; } bool is_public(const DoutPrefixProvider *dpp) const; friend bool operator==(const RGWAccessControlPolicy& lhs, const RGWAccessControlPolicy& rhs); diff --git a/src/rgw/rgw_acl_s3.cc b/src/rgw/rgw_acl_s3.cc index 9f71e328150..e45fb552aff 100644 --- a/src/rgw/rgw_acl_s3.cc +++ b/src/rgw/rgw_acl_s3.cc @@ -7,6 +7,7 @@ #include <map> #include "include/types.h" +#include "common/split.h" #include "rgw_acl_s3.h" #include "rgw_user.h" @@ -24,8 +25,17 @@ using namespace std; static string rgw_uri_all_users = RGW_URI_ALL_USERS; static string rgw_uri_auth_users = RGW_URI_AUTH_USERS; -void ACLPermission_S3::to_xml(ostream& out) +class ACLPermission_S3 : public XMLObj { +public: + uint32_t flags = 0; + + bool xml_end(const char *el) override; +}; + +void to_xml(ACLPermission perm, std::ostream& out) +{ + const uint32_t flags = perm.get_permissions(); if ((flags & RGW_PERM_FULL_CONTROL) == RGW_PERM_FULL_CONTROL) { out << "<Permission>FULL_CONTROL</Permission>"; } else { @@ -40,8 +50,7 @@ void ACLPermission_S3::to_xml(ostream& out) } } -bool ACLPermission_S3:: -xml_end(const char *el) +bool ACLPermission_S3::xml_end(const char *el) { const char *s = data.c_str(); if (strcasecmp(s, "READ") == 0) { @@ -66,7 +75,7 @@ xml_end(const char *el) class ACLGranteeType_S3 { public: - static const char *to_string(ACLGranteeType& type) { + static const char *to_string(ACLGranteeType type) { switch (type.get_type()) { case ACL_TYPE_CANON_USER: return "CanonicalUser"; @@ -95,6 +104,15 @@ public: } }; +class ACLGrantee_S3 : public XMLObj +{ +public: + ACLGrantee_S3() {} + virtual ~ACLGrantee_S3() override {} + + bool xml_start(const char *el, const char **attr); +}; + class ACLID_S3 : public XMLObj { public: @@ -124,6 +142,15 @@ public: ~ACLDisplayName_S3() override {} }; +class ACLOwner_S3 : public XMLObj +{ +public: + std::string id; + std::string display_name; + + bool xml_end(const char *el) override; +}; + bool ACLOwner_S3::xml_end(const char *el) { ACLID_S3 *acl_id = static_cast<ACLID_S3 *>(find_first("ID")); ACLID_S3 *acl_name = static_cast<ACLID_S3 *>(find_first("DisplayName")); @@ -142,23 +169,37 @@ bool ACLOwner_S3::xml_end(const char *el) { return true; } -void ACLOwner_S3::to_xml(ostream& out) { +void to_xml(const ACLOwner& o, std::ostream& out) +{ string s; - id.to_str(s); + o.id.to_str(s); if (s.empty()) return; out << "<Owner>" << "<ID>" << s << "</ID>"; - if (!display_name.empty()) - out << "<DisplayName>" << display_name << "</DisplayName>"; + if (!o.display_name.empty()) + out << "<DisplayName>" << o.display_name << "</DisplayName>"; out << "</Owner>"; } +class ACLGrant_S3 : public XMLObj +{ +public: + ACLGranteeType type; + std::string id; + std::string name; + std::string uri; + std::string email; + ACLPermission_S3* permission = nullptr; + + bool xml_end(const char *el) override; + bool xml_start(const char *el, const char **attr); +}; + bool ACLGrant_S3::xml_end(const char *el) { ACLGrantee_S3 *acl_grantee; ACLID_S3 *acl_id; ACLURI_S3 *acl_uri; ACLEmail_S3 *acl_email; - ACLPermission_S3 *acl_permission; ACLDisplayName_S3 *acl_name; string uri; @@ -168,17 +209,12 @@ bool ACLGrant_S3::xml_end(const char *el) { string type_str; if (!acl_grantee->get_attr("xsi:type", type_str)) return false; - ACLGranteeType_S3::set(type_str.c_str(), type); - - acl_permission = static_cast<ACLPermission_S3 *>(find_first("Permission")); - if (!acl_permission) - return false; - permission = *acl_permission; + ACLGranteeType_S3::set(type_str.c_str(), type); - id.clear(); - name.clear(); - email.clear(); + permission = static_cast<ACLPermission_S3*>(find_first("Permission")); + if (!permission) + return false; switch (type.get_type()) { case ACL_TYPE_CANON_USER: @@ -195,7 +231,6 @@ bool ACLGrant_S3::xml_end(const char *el) { if (!acl_uri) return false; uri = acl_uri->get_data(); - group = uri_to_group(uri); break; case ACL_TYPE_EMAIL_USER: acl_email = static_cast<ACLEmail_S3 *>(acl_grantee->find_first("EmailAddress")); @@ -210,72 +245,51 @@ bool ACLGrant_S3::xml_end(const char *el) { return true; } -void ACLGrant_S3::to_xml(CephContext *cct, ostream& out) { - ACLPermission_S3& perm = static_cast<ACLPermission_S3 &>(permission); +void to_xml(const ACLGrant& grant, ostream& out) +{ + const ACLPermission perm = grant.get_permission(); /* only show s3 compatible permissions */ if (!(perm.get_permissions() & RGW_PERM_ALL_S3)) return; - string uri; + const std::string type = ACLGranteeType_S3::to_string(grant.get_type()); out << "<Grant>" << - "<Grantee xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:type=\"" << ACLGranteeType_S3::to_string(type) << "\">"; - switch (type.get_type()) { - case ACL_TYPE_CANON_USER: - out << "<ID>" << id << "</ID>"; - if (name.size()) { - out << "<DisplayName>" << name << "</DisplayName>"; - } - break; - case ACL_TYPE_EMAIL_USER: - out << "<EmailAddress>" << email << "</EmailAddress>"; - break; - case ACL_TYPE_GROUP: - if (!group_to_uri(group, uri)) { - ldout(cct, 0) << "ERROR: group_to_uri failed with group=" << (int)group << dendl; - break; + "<Grantee xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:type=\"" << type << "\">"; + + if (const auto* user = grant.get_user(); user) { + out << "<ID>" << user->id << "</ID>"; + if (user->name.size()) { + out << "<DisplayName>" << user->name << "</DisplayName>"; } + } else if (const auto* email = grant.get_email(); email) { + out << "<EmailAddress>" << email->address << "</EmailAddress>"; + } else if (const auto* group = grant.get_group(); group) { + std::string uri; + rgw::s3::acl_group_to_uri(group->type, uri); out << "<URI>" << uri << "</URI>"; - break; - default: - break; } out << "</Grantee>"; - perm.to_xml(out); + to_xml(perm, out); out << "</Grant>"; } -bool ACLGrant_S3::group_to_uri(ACLGroupTypeEnum group, string& uri) +class RGWAccessControlList_S3 : public XMLObj { - switch (group) { - case ACL_GROUP_ALL_USERS: - uri = rgw_uri_all_users; - return true; - case ACL_GROUP_AUTHENTICATED_USERS: - uri = rgw_uri_auth_users; - return true; - default: - return false; - } -} +public: + bool xml_end(const char *el) override; +}; bool RGWAccessControlList_S3::xml_end(const char *el) { - XMLObjIter iter = find("Grant"); - ACLGrant_S3 *grant = static_cast<ACLGrant_S3 *>(iter.get_next()); - while (grant) { - add_grant(grant); - grant = static_cast<ACLGrant_S3 *>(iter.get_next()); - } return true; } -void RGWAccessControlList_S3::to_xml(ostream& out) { - multimap<string, ACLGrant>::iterator iter; +void to_xml(const RGWAccessControlList& acl, std::ostream& out) +{ out << "<AccessControlList>"; - for (iter = grant_map.begin(); iter != grant_map.end(); ++iter) { - ACLGrant_S3& grant = static_cast<ACLGrant_S3 &>(iter->second); - grant.to_xml(cct, out); + for (const auto& p : acl.get_grant_map()) { + to_xml(p.second, out); } out << "</AccessControlList>"; } @@ -285,16 +299,11 @@ struct s3_acl_header { const char *http_header; }; -static const char *get_acl_header(const RGWEnv *env, - const struct s3_acl_header *perm) -{ - const char *header = perm->http_header; - - return env->get(header, NULL); -} - -static int parse_grantee_str(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, string& grantee_str, - const struct s3_acl_header *perm, ACLGrant& grant) +static int parse_grantee_str(const DoutPrefixProvider* dpp, + rgw::sal::Driver* driver, + const std::string& grantee_str, + const s3_acl_header* perm, + ACLGrant& grant) { string id_type, id_val_quoted; int rgw_perm = perm->rgw_perm; @@ -321,7 +330,7 @@ static int parse_grantee_str(const DoutPrefixProvider *dpp, rgw::sal::Driver* dr grant.set_canon(user->get_id(), user->get_display_name(), rgw_perm); } else if (strcasecmp(id_type.c_str(), "uri") == 0) { - ACLGroupTypeEnum gid = grant.uri_to_group(id_val); + ACLGroupTypeEnum gid = rgw::s3::acl_uri_to_group(id_val); if (gid == ACL_GROUP_NONE) return -EINVAL; @@ -333,71 +342,70 @@ static int parse_grantee_str(const DoutPrefixProvider *dpp, rgw::sal::Driver* dr return 0; } -static int parse_acl_header(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, - const RGWEnv *env, const struct s3_acl_header *perm, - std::list<ACLGrant>& _grants) +static int parse_acl_header(const DoutPrefixProvider* dpp, rgw::sal::Driver* driver, + const RGWEnv& env, const s3_acl_header* perm, + RGWAccessControlList& acl) { - std::list<string> grantees; - std::string hacl_str; - - const char *hacl = get_acl_header(env, perm); - if (hacl == NULL) + const char* hacl = env.get(perm->http_header, nullptr); + if (hacl == nullptr) { return 0; + } - hacl_str = hacl; - get_str_list(hacl_str, ",", grantees); - - for (list<string>::iterator it = grantees.begin(); it != grantees.end(); ++it) { + for (std::string_view grantee : ceph::split(hacl, ",")) { ACLGrant grant; - int ret = parse_grantee_str(dpp, driver, *it, perm, grant); + int ret = parse_grantee_str(dpp, driver, std::string{grantee}, perm, grant); if (ret < 0) return ret; - _grants.push_back(grant); + acl.add_grant(grant); } return 0; } -int RGWAccessControlList_S3::create_canned(ACLOwner& owner, ACLOwner& bucket_owner, const string& canned_acl) +static int create_canned(const ACLOwner& owner, const ACLOwner& bucket_owner, + const string& canned_acl, RGWAccessControlList& acl) { - acl_user_map.clear(); - grant_map.clear(); - - ACLGrant owner_grant; - - rgw_user bid = bucket_owner.get_id(); - string bname = bucket_owner.get_display_name(); + const rgw_user& bid = bucket_owner.id; + const std::string& bname = bucket_owner.display_name; /* owner gets full control */ - owner_grant.set_canon(owner.get_id(), owner.get_display_name(), RGW_PERM_FULL_CONTROL); - add_grant(&owner_grant); + { + ACLGrant grant; + grant.set_canon(owner.id, owner.display_name, RGW_PERM_FULL_CONTROL); + acl.add_grant(grant); + } if (canned_acl.size() == 0 || canned_acl.compare("private") == 0) { return 0; } - ACLGrant bucket_owner_grant; - ACLGrant group_grant; - if (canned_acl.compare("public-read") == 0) { - group_grant.set_group(ACL_GROUP_ALL_USERS, RGW_PERM_READ); - add_grant(&group_grant); - } else if (canned_acl.compare("public-read-write") == 0) { - group_grant.set_group(ACL_GROUP_ALL_USERS, RGW_PERM_READ); - add_grant(&group_grant); - group_grant.set_group(ACL_GROUP_ALL_USERS, RGW_PERM_WRITE); - add_grant(&group_grant); - } else if (canned_acl.compare("authenticated-read") == 0) { - group_grant.set_group(ACL_GROUP_AUTHENTICATED_USERS, RGW_PERM_READ); - add_grant(&group_grant); - } else if (canned_acl.compare("bucket-owner-read") == 0) { - bucket_owner_grant.set_canon(bid, bname, RGW_PERM_READ); - if (bid.compare(owner.get_id()) != 0) - add_grant(&bucket_owner_grant); - } else if (canned_acl.compare("bucket-owner-full-control") == 0) { - bucket_owner_grant.set_canon(bid, bname, RGW_PERM_FULL_CONTROL); - if (bid.compare(owner.get_id()) != 0) - add_grant(&bucket_owner_grant); + if (canned_acl == "public-read") { + ACLGrant grant; + grant.set_group(ACL_GROUP_ALL_USERS, RGW_PERM_READ); + acl.add_grant(grant); + } else if (canned_acl == "public-read-write") { + ACLGrant grant; + grant.set_group(ACL_GROUP_ALL_USERS, RGW_PERM_READ); + acl.add_grant(grant); + grant.set_group(ACL_GROUP_ALL_USERS, RGW_PERM_WRITE); + acl.add_grant(grant); + } else if (canned_acl == "authenticated-read") { + ACLGrant grant; + grant.set_group(ACL_GROUP_AUTHENTICATED_USERS, RGW_PERM_READ); + acl.add_grant(grant); + } else if (canned_acl == "bucket-owner-read") { + if (bid != owner.id) { + ACLGrant grant; + grant.set_canon(bid, bname, RGW_PERM_READ); + acl.add_grant(grant); + } + } else if (canned_acl == "bucket-owner-full-control") { + if (bid != owner.id) { + ACLGrant grant; + grant.set_canon(bid, bname, RGW_PERM_FULL_CONTROL); + acl.add_grant(grant); + } } else { return -EINVAL; } @@ -405,21 +413,11 @@ int RGWAccessControlList_S3::create_canned(ACLOwner& owner, ACLOwner& bucket_own return 0; } -int RGWAccessControlList_S3::create_from_grants(std::list<ACLGrant>& grants) +class RGWAccessControlPolicy_S3 : public XMLObj { - if (grants.empty()) - return -EINVAL; - - acl_user_map.clear(); - grant_map.clear(); - - for (std::list<ACLGrant>::iterator it = grants.begin(); it != grants.end(); ++it) { - ACLGrant g = *it; - add_grant(&g); - } - - return 0; -} +public: + bool xml_end(const char *el) override; +}; bool RGWAccessControlPolicy_S3::xml_end(const char *el) { RGWAccessControlList_S3 *s3acl = @@ -427,21 +425,17 @@ bool RGWAccessControlPolicy_S3::xml_end(const char *el) { if (!s3acl) return false; - acl = *s3acl; - - ACLOwner *owner_p = static_cast<ACLOwner_S3 *>(find_first("Owner")); + ACLOwner_S3 *owner_p = static_cast<ACLOwner_S3 *>(find_first("Owner")); if (!owner_p) return false; - owner = *owner_p; return true; } -void RGWAccessControlPolicy_S3::to_xml(ostream& out) { +void to_xml(const RGWAccessControlPolicy& p, std::ostream& out) +{ out << "<AccessControlPolicy xmlns=\"" << XMLNS_AWS_S3 << "\">"; - ACLOwner_S3& _owner = static_cast<ACLOwner_S3 &>(owner); - RGWAccessControlList_S3& _acl = static_cast<RGWAccessControlList_S3 &>(acl); - _owner.to_xml(out); - _acl.to_xml(out); + to_xml(p.get_owner(), out); + to_xml(p.get_acl(), out); out << "</AccessControlPolicy>"; } @@ -454,164 +448,77 @@ static const s3_acl_header acl_header_perms[] = { {0, NULL} }; -int RGWAccessControlPolicy_S3::create_from_headers(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - const RGWEnv *env, ACLOwner& _owner) +static int resolve_grant(const DoutPrefixProvider* dpp, optional_yield y, + rgw::sal::Driver* driver, ACLGrant_S3& xml_grant, + ACLGrant& grant, std::string& err_msg) { - std::list<ACLGrant> grants; - int r = 0; + const uint32_t perm = xml_grant.permission->flags; - for (const struct s3_acl_header *p = acl_header_perms; p->rgw_perm; p++) { - r = parse_acl_header(dpp, driver, env, p, grants); - if (r < 0) { - return r; + std::unique_ptr<rgw::sal::User> user; + switch (xml_grant.type.get_type()) { + case ACL_TYPE_EMAIL_USER: + if (xml_grant.email.empty()) { + return -EINVAL; } - } - - RGWAccessControlList_S3& _acl = static_cast<RGWAccessControlList_S3 &>(acl); - r = _acl.create_from_grants(grants); - - owner = _owner; - - return r; -} + if (driver->get_user_by_email(dpp, xml_grant.email, y, &user) < 0) { + ldpp_dout(dpp, 10) << "grant user email not found or other error" << dendl; + err_msg = "The e-mail address you provided does not match any account on record."; + return -ERR_UNRESOLVABLE_EMAIL; + } + grant.set_canon(user->get_id(), user->get_display_name(), perm); + return 0; -/* - can only be called on object that was parsed - */ -int RGWAccessControlPolicy_S3::rebuild(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, ACLOwner *owner, - RGWAccessControlPolicy& dest, std::string &err_msg) -{ - if (!owner) - return -EINVAL; + case ACL_TYPE_CANON_USER: + user = driver->get_user(rgw_user{xml_grant.id}); + if (user->load_user(dpp, y) < 0) { + ldpp_dout(dpp, 10) << "grant user does not exist: " << xml_grant.id << dendl; + err_msg = "Invalid CanonicalUser id"; + return -EINVAL; + } + grant.set_canon(user->get_id(), user->get_display_name(), perm); + return 0; - ACLOwner *requested_owner = static_cast<ACLOwner_S3 *>(find_first("Owner")); - if (requested_owner) { - rgw_user& requested_id = requested_owner->get_id(); - if (!requested_id.empty() && requested_id.compare(owner->get_id()) != 0) - return -EPERM; - } + case ACL_TYPE_GROUP: + if (const auto group = rgw::s3::acl_uri_to_group(xml_grant.uri); + group != ACL_GROUP_NONE) { + grant.set_group(group, perm); + return 0; + } else { + ldpp_dout(dpp, 10) << "bad grant group: " << xml_grant.uri << dendl; + err_msg = "Invalid group uri"; + return -EINVAL; + } - std::unique_ptr<rgw::sal::User> user = driver->get_user(owner->get_id()); - if (user->load_user(dpp, null_yield) < 0) { - ldpp_dout(dpp, 10) << "owner info does not exist" << dendl; - err_msg = "Invalid id"; + case ACL_TYPE_REFERER: + case ACL_TYPE_UNKNOWN: + default: + err_msg = "Invalid Grantee type"; return -EINVAL; } - ACLOwner& dest_owner = dest.get_owner(); - dest_owner.set_id(owner->get_id()); - dest_owner.set_name(user->get_display_name()); - - ldpp_dout(dpp, 20) << "owner id=" << owner->get_id() << dendl; - ldpp_dout(dpp, 20) << "dest owner id=" << dest.get_owner().get_id() << dendl; - - RGWAccessControlList& dst_acl = dest.get_acl(); - - multimap<string, ACLGrant>& grant_map = acl.get_grant_map(); - multimap<string, ACLGrant>::iterator iter; - for (iter = grant_map.begin(); iter != grant_map.end(); ++iter) { - ACLGrant& src_grant = iter->second; - ACLGranteeType& type = src_grant.get_type(); - ACLGrant new_grant; - bool grant_ok = false; - rgw_user uid; - RGWUserInfo grant_user; - switch (type.get_type()) { - case ACL_TYPE_EMAIL_USER: - { - string email; - rgw_user u; - if (!src_grant.get_id(u)) { - ldpp_dout(dpp, 0) << "ERROR: src_grant.get_id() failed" << dendl; - return -EINVAL; - } - email = u.id; - ldpp_dout(dpp, 10) << "grant user email=" << email << dendl; - if (driver->get_user_by_email(dpp, email, null_yield, &user) < 0) { - ldpp_dout(dpp, 10) << "grant user email not found or other error" << dendl; - err_msg = "The e-mail address you provided does not match any account on record."; - return -ERR_UNRESOLVABLE_EMAIL; - } - grant_user = user->get_info(); - uid = grant_user.user_id; - } - case ACL_TYPE_CANON_USER: - { - if (type.get_type() == ACL_TYPE_CANON_USER) { - if (!src_grant.get_id(uid)) { - ldpp_dout(dpp, 0) << "ERROR: src_grant.get_id() failed" << dendl; - err_msg = "Invalid id"; - return -EINVAL; - } - } - - if (grant_user.user_id.empty()) { - user = driver->get_user(uid); - if (user->load_user(dpp, null_yield) < 0) { - ldpp_dout(dpp, 10) << "grant user does not exist:" << uid << dendl; - err_msg = "Invalid id"; - return -EINVAL; - } else { - grant_user = user->get_info(); - } - } - ACLPermission& perm = src_grant.get_permission(); - new_grant.set_canon(uid, grant_user.display_name, perm.get_permissions()); - grant_ok = true; - rgw_user new_id; - new_grant.get_id(new_id); - ldpp_dout(dpp, 10) << "new grant: " << new_id << ":" << grant_user.display_name << dendl; - } - break; - case ACL_TYPE_GROUP: - { - string uri; - if (ACLGrant_S3::group_to_uri(src_grant.get_group(), uri)) { - new_grant = src_grant; - grant_ok = true; - ldpp_dout(dpp, 10) << "new grant: " << uri << dendl; - } else { - ldpp_dout(dpp, 10) << "bad grant group:" << (int)src_grant.get_group() << dendl; - err_msg = "Invalid group uri"; - return -EINVAL; - } - } - default: - break; - } - if (grant_ok) { - dst_acl.add_grant(&new_grant); - } - } - - return 0; } -bool RGWAccessControlPolicy_S3::compare_group_name(string& id, ACLGroupTypeEnum group) +/** + * Interfaces with the webserver's XML handling code + * to parse it in a way that makes sense for the rgw. + */ +class RGWACLXMLParser_S3 : public RGWXMLParser { - switch (group) { - case ACL_GROUP_ALL_USERS: - return (id.compare(RGW_USER_ANON_ID) == 0); - case ACL_GROUP_AUTHENTICATED_USERS: - return (id.compare(rgw_uri_auth_users) == 0); - default: - return id.empty(); - } + CephContext *cct; - // shouldn't get here - return false; -} + XMLObj *alloc_obj(const char *el) override; +public: + explicit RGWACLXMLParser_S3(CephContext *_cct) : cct(_cct) {} +}; XMLObj *RGWACLXMLParser_S3::alloc_obj(const char *el) { XMLObj * obj = NULL; if (strcmp(el, "AccessControlPolicy") == 0) { - obj = new RGWAccessControlPolicy_S3(cct); + obj = new RGWAccessControlPolicy_S3(); } else if (strcmp(el, "Owner") == 0) { obj = new ACLOwner_S3(); } else if (strcmp(el, "AccessControlList") == 0) { - obj = new RGWAccessControlList_S3(cct); + obj = new RGWAccessControlList_S3(); } else if (strcmp(el, "ID") == 0) { obj = new ACLID_S3(); } else if (strcmp(el, "DisplayName") == 0) { @@ -631,13 +538,134 @@ XMLObj *RGWACLXMLParser_S3::alloc_obj(const char *el) return obj; } -ACLGroupTypeEnum ACLGrant_S3::uri_to_group(string& uri) +namespace rgw::s3 { + +ACLGroupTypeEnum acl_uri_to_group(std::string_view uri) { - if (uri.compare(rgw_uri_all_users) == 0) + if (uri == rgw_uri_all_users) return ACL_GROUP_ALL_USERS; - else if (uri.compare(rgw_uri_auth_users) == 0) + else if (uri == rgw_uri_auth_users) return ACL_GROUP_AUTHENTICATED_USERS; return ACL_GROUP_NONE; } +bool acl_group_to_uri(ACLGroupTypeEnum group, std::string& uri) +{ + switch (group) { + case ACL_GROUP_ALL_USERS: + uri = rgw_uri_all_users; + return true; + case ACL_GROUP_AUTHENTICATED_USERS: + uri = rgw_uri_auth_users; + return true; + default: + return false; + } +} + +int parse_policy(const DoutPrefixProvider* dpp, optional_yield y, + rgw::sal::Driver* driver, std::string_view document, + RGWAccessControlPolicy& policy, std::string& err_msg) +{ + RGWACLXMLParser_S3 parser(dpp->get_cct()); + if (!parser.init()) { + return -EINVAL; + } + if (!parser.parse(document.data(), document.size(), 1)) { + return -EINVAL; + } + + const auto xml_root = static_cast<RGWAccessControlPolicy_S3*>( + parser.find_first("AccessControlPolicy")); + if (!xml_root) { + err_msg = "Missing element AccessControlPolicy"; + return -EINVAL; + } + + const auto xml_owner = static_cast<ACLOwner_S3*>( + xml_root->find_first("Owner")); + if (!xml_owner) { + err_msg = "Missing element Owner"; + return -EINVAL; + } + + // owner must exist + std::unique_ptr<rgw::sal::User> user = + driver->get_user(rgw_user{xml_owner->id}); + if (user->load_user(dpp, y) < 0) { + ldpp_dout(dpp, 10) << "acl owner does not exist" << dendl; + err_msg = "Invalid Owner ID"; + return -EINVAL; + } + + ACLOwner& owner = policy.get_owner(); + owner.id = xml_owner->id; + if (!xml_owner->display_name.empty()) { + owner.display_name = xml_owner->display_name; + } else { + owner.display_name = user->get_display_name(); + } + + const auto xml_acl = static_cast<ACLOwner_S3*>( + xml_root->find_first("AccessControlList")); + if (!xml_acl) { + err_msg = "Missing element AccessControlList"; + return -EINVAL; + } + + // iterate parsed grants + XMLObjIter iter = xml_acl->find("Grant"); + ACLGrant_S3* xml_grant = static_cast<ACLGrant_S3*>(iter.get_next()); + while (xml_grant) { + ACLGrant grant; + int r = resolve_grant(dpp, y, driver, *xml_grant, grant, err_msg); + if (r < 0) { + return r; + } + policy.get_acl().add_grant(grant); + xml_grant = static_cast<ACLGrant_S3*>(iter.get_next()); + } + + return 0; +} + +void write_policy_xml(const RGWAccessControlPolicy& policy, + std::ostream& out) +{ + to_xml(policy, out); +} + +int create_canned_acl(const ACLOwner& owner, + const ACLOwner& bucket_owner, + const std::string& canned_acl, + RGWAccessControlPolicy& policy) +{ + if (owner.id == rgw_user("anonymous")) { + policy.set_owner(bucket_owner); + } else { + policy.set_owner(owner); + } + return create_canned(owner, bucket_owner, canned_acl, policy.get_acl()); +} + +int create_policy_from_headers(const DoutPrefixProvider* dpp, + rgw::sal::Driver* driver, + const ACLOwner& owner, + const RGWEnv& env, + RGWAccessControlPolicy& policy) +{ + policy.set_owner(owner); + auto& acl = policy.get_acl(); + + for (const s3_acl_header* p = acl_header_perms; p->rgw_perm; p++) { + int r = parse_acl_header(dpp, driver, env, p, acl); + if (r < 0) { + return r; + } + } + + return 0; +} + +} // namespace rgw::s3 diff --git a/src/rgw/rgw_acl_s3.h b/src/rgw/rgw_acl_s3.h index c234d722b99..2341461783f 100644 --- a/src/rgw/rgw_acl_s3.h +++ b/src/rgw/rgw_acl_s3.h @@ -8,108 +8,40 @@ #include <iosfwd> #include <include/types.h> -#include "include/str_list.h" +#include "common/async/yield_context.h" #include "rgw_xml.h" #include "rgw_acl.h" #include "rgw_sal_fwd.h" -class RGWUserCtl; - -class ACLPermission_S3 : public ACLPermission, public XMLObj -{ -public: - ACLPermission_S3() {} - virtual ~ACLPermission_S3() override {} - - bool xml_end(const char *el) override; - void to_xml(std::ostream& out); -}; - -class ACLGrantee_S3 : public ACLGrantee, public XMLObj -{ -public: - ACLGrantee_S3() {} - virtual ~ACLGrantee_S3() override {} - - bool xml_start(const char *el, const char **attr); -}; - - -class ACLGrant_S3 : public ACLGrant, public XMLObj -{ -public: - ACLGrant_S3() {} - virtual ~ACLGrant_S3() override {} - - void to_xml(CephContext *cct, std::ostream& out); - bool xml_end(const char *el) override; - bool xml_start(const char *el, const char **attr); - - static ACLGroupTypeEnum uri_to_group(std::string& uri); - static bool group_to_uri(ACLGroupTypeEnum group, std::string& uri); -}; - -class RGWAccessControlList_S3 : public RGWAccessControlList, public XMLObj -{ -public: - explicit RGWAccessControlList_S3(CephContext *_cct) : RGWAccessControlList(_cct) {} - virtual ~RGWAccessControlList_S3() override {} - - bool xml_end(const char *el) override; - void to_xml(std::ostream& out); - - int create_canned(ACLOwner& owner, ACLOwner& bucket_owner, const std::string& canned_acl); - int create_from_grants(std::list<ACLGrant>& grants); -}; - -class ACLOwner_S3 : public ACLOwner, public XMLObj -{ -public: - ACLOwner_S3() {} - virtual ~ACLOwner_S3() override {} - - bool xml_end(const char *el) override; - void to_xml(std::ostream& out); -}; - class RGWEnv; -class RGWAccessControlPolicy_S3 : public RGWAccessControlPolicy, public XMLObj -{ -public: - explicit RGWAccessControlPolicy_S3(CephContext *_cct) : RGWAccessControlPolicy(_cct) {} - virtual ~RGWAccessControlPolicy_S3() override {} +namespace rgw::s3 { + +ACLGroupTypeEnum acl_uri_to_group(std::string_view uri); +bool acl_group_to_uri(ACLGroupTypeEnum group, std::string& uri); - bool xml_end(const char *el) override; +/// Construct a policy from an AccessControlPolicy xml document. Email grantees +/// are looked up and converted to a corresponding CanonicalUser grant. All user +/// ids are verified to exist. +int parse_policy(const DoutPrefixProvider* dpp, optional_yield y, + rgw::sal::Driver* driver, std::string_view document, + RGWAccessControlPolicy& policy, std::string& err_msg); - void to_xml(std::ostream& out); - int rebuild(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, ACLOwner *owner, - RGWAccessControlPolicy& dest, std::string &err_msg); - bool compare_group_name(std::string& id, ACLGroupTypeEnum group) override; +/// Write an AccessControlPolicy xml document for the given policy. +void write_policy_xml(const RGWAccessControlPolicy& policy, + std::ostream& out); - virtual int create_canned(ACLOwner& _owner, ACLOwner& bucket_owner, const std::string& canned_acl) { - RGWAccessControlList_S3& _acl = static_cast<RGWAccessControlList_S3 &>(acl); - if (_owner.get_id() == rgw_user("anonymous")) { - owner = bucket_owner; - } else { - owner = _owner; - } - int ret = _acl.create_canned(owner, bucket_owner, canned_acl); - return ret; - } - int create_from_headers(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, - const RGWEnv *env, ACLOwner& _owner); -}; +/// Construct a policy from a s3 canned acl string. +int create_canned_acl(const ACLOwner& owner, + const ACLOwner& bucket_owner, + const std::string& canned_acl, + RGWAccessControlPolicy& policy); -/** - * Interfaces with the webserver's XML handling code - * to parse it in a way that makes sense for the rgw. - */ -class RGWACLXMLParser_S3 : public RGWXMLParser -{ - CephContext *cct; +/// Construct a policy from x-amz-grant-* request headers. +int create_policy_from_headers(const DoutPrefixProvider* dpp, + rgw::sal::Driver* driver, + const ACLOwner& owner, + const RGWEnv& env, + RGWAccessControlPolicy& policy); - XMLObj *alloc_obj(const char *el) override; -public: - explicit RGWACLXMLParser_S3(CephContext *_cct) : cct(_cct) {} -}; +} // namespace rgw::s3 diff --git a/src/rgw/rgw_acl_swift.cc b/src/rgw/rgw_acl_swift.cc index c6f06bbd37c..260f4530d41 100644 --- a/src/rgw/rgw_acl_swift.cc +++ b/src/rgw/rgw_acl_swift.cc @@ -3,11 +3,13 @@ #include <string.h> +#include <optional> #include <vector> #include <boost/algorithm/string/predicate.hpp> #include "common/ceph_json.h" +#include "common/split.h" #include "rgw_common.h" #include "rgw_user.h" #include "rgw_acl_swift.h" @@ -26,27 +28,6 @@ using namespace std; -static int parse_list(const char* uid_list, - std::vector<std::string>& uids) /* out */ -{ - char *s = strdup(uid_list); - if (!s) { - return -ENOMEM; - } - - char *tokctx; - const char *p = strtok_r(s, " ,", &tokctx); - while (p) { - if (*p) { - string acl = p; - uids.push_back(acl); - } - p = strtok_r(NULL, " ,", &tokctx); - } - free(s); - return 0; -} - static bool is_referrer(const std::string& designator) { return designator.compare(".r") == 0 || @@ -73,8 +54,8 @@ static bool uid_is_public(const string& uid) return is_referrer(sub); } -static boost::optional<ACLGrant> referrer_to_grant(std::string url_spec, - const uint32_t perm) +static std::optional<ACLGrant> referrer_to_grant(std::string url_spec, + const uint32_t perm) { /* This function takes url_spec as non-ref std::string because of the trim * operation that is essential to preserve compliance with Swift. It can't @@ -99,7 +80,7 @@ static boost::optional<ACLGrant> referrer_to_grant(std::string url_spec, } if (url_spec.empty() || url_spec == ".") { - return boost::none; + return std::nullopt; } } else { /* Please be aware we're specially handling the .r:* in _add_grant() @@ -110,21 +91,18 @@ static boost::optional<ACLGrant> referrer_to_grant(std::string url_spec, grant.set_referer(url_spec, is_negative ? 0 : perm); return grant; } catch (const std::out_of_range&) { - return boost::none; + return std::nullopt; } } static ACLGrant user_to_grant(const DoutPrefixProvider *dpp, - CephContext* const cct, rgw::sal::Driver* driver, const std::string& uid, const uint32_t perm) { - RGWUserInfo grant_user; ACLGrant grant; - std::unique_ptr<rgw::sal::User> user; - user = driver->get_user(rgw_user(uid)); + std::unique_ptr<rgw::sal::User> user = driver->get_user(rgw_user(uid)); if (user->load_user(dpp, null_yield) < 0) { ldpp_dout(dpp, 10) << "grant user does not exist: " << uid << dendl; /* skipping silently */ @@ -136,101 +114,102 @@ static ACLGrant user_to_grant(const DoutPrefixProvider *dpp, return grant; } -int RGWAccessControlPolicy_SWIFT::add_grants(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - const std::vector<std::string>& uids, - const uint32_t perm) +// parse a container acl grant in 'V1' format +// https://docs.openstack.org/swift/latest/overview_acl.html#container-acls +static auto parse_grant(const DoutPrefixProvider* dpp, + rgw::sal::Driver* driver, + const std::string& uid, + const uint32_t perm) + -> std::optional<ACLGrant> { - for (const auto& uid : uids) { - boost::optional<ACLGrant> grant; - ldpp_dout(dpp, 20) << "trying to add grant for ACL uid=" << uid << dendl; - - /* Let's check whether the item has a separator potentially indicating - * a special meaning (like an HTTP referral-based grant). */ - const size_t pos = uid.find(':'); - if (std::string::npos == pos) { - /* No, it don't have -- we've got just a regular user identifier. */ - grant = user_to_grant(dpp, cct, driver, uid, perm); - } else { - /* Yes, *potentially* an HTTP referral. */ - auto designator = uid.substr(0, pos); - auto designatee = uid.substr(pos + 1); - - /* Swift strips whitespaces at both beginning and end. */ - boost::algorithm::trim(designator); - boost::algorithm::trim(designatee); - - if (! boost::algorithm::starts_with(designator, ".")) { - grant = user_to_grant(dpp, cct, driver, uid, perm); - } else if ((perm & SWIFT_PERM_WRITE) == 0 && is_referrer(designator)) { - /* HTTP referrer-based ACLs aren't acceptable for writes. */ - grant = referrer_to_grant(designatee, perm); - } - } + ldpp_dout(dpp, 20) << "trying to add grant for ACL uid=" << uid << dendl; + + /* Let's check whether the item has a separator potentially indicating + * a special meaning (like an HTTP referral-based grant). */ + const size_t pos = uid.find(':'); + if (std::string::npos == pos) { + /* No, it don't have -- we've got just a regular user identifier. */ + return user_to_grant(dpp, driver, uid, perm); + } - if (grant) { - acl.add_grant(&*grant); - } else { - return -EINVAL; - } + /* Yes, *potentially* an HTTP referral. */ + auto designator = uid.substr(0, pos); + auto designatee = uid.substr(pos + 1); + + /* Swift strips whitespaces at both beginning and end. */ + boost::algorithm::trim(designator); + boost::algorithm::trim(designatee); + + if (! boost::algorithm::starts_with(designator, ".")) { + return user_to_grant(dpp, driver, uid, perm); + } + if ((perm & SWIFT_PERM_WRITE) == 0 && is_referrer(designator)) { + /* HTTP referrer-based ACLs aren't acceptable for writes. */ + return referrer_to_grant(designatee, perm); } - return 0; + return std::nullopt; } +static void add_grants(const DoutPrefixProvider* dpp, + rgw::sal::Driver* driver, + const std::vector<std::string>& uids, + uint32_t perm, RGWAccessControlList& acl) +{ + for (const auto& uid : uids) { + ACLGrant grant; + if (uid_is_public(uid)) { + grant.set_group(ACL_GROUP_ALL_USERS, perm); + } else { + grant = user_to_grant(dpp, driver, uid, perm); + } + acl.add_grant(grant); + } +} + +namespace rgw::swift { -int RGWAccessControlPolicy_SWIFT::create(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - const rgw_user& id, - const std::string& name, - const char* read_list, - const char* write_list, - uint32_t& rw_mask) +int create_container_policy(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + const rgw_user& id, + const std::string& name, + const char* read_list, + const char* write_list, + uint32_t& rw_mask, + RGWAccessControlPolicy& policy) { - acl.create_default(id, name); - owner.set_id(id); - owner.set_name(name); - rw_mask = 0; + policy.create_default(id, name); + auto& acl = policy.get_acl(); if (read_list) { - std::vector<std::string> uids; - int r = parse_list(read_list, uids); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: parse_list for read returned r=" - << r << dendl; - return r; - } - - r = add_grants(dpp, driver, uids, SWIFT_PERM_READ); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: add_grants for read returned r=" - << r << dendl; - return r; + for (std::string_view uid : ceph::split(read_list, " ,")) { + auto grant = parse_grant(dpp, driver, std::string{uid}, SWIFT_PERM_READ); + if (!grant) { + ldpp_dout(dpp, 4) << "ERROR: failed to parse read acl grant " + << uid << dendl; + return -EINVAL; + } + acl.add_grant(*grant); } rw_mask |= SWIFT_PERM_READ; } if (write_list) { - std::vector<std::string> uids; - int r = parse_list(write_list, uids); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: parse_list for write returned r=" - << r << dendl; - return r; - } - - r = add_grants(dpp, driver, uids, SWIFT_PERM_WRITE); - if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: add_grants for write returned r=" - << r << dendl; - return r; + for (std::string_view uid : ceph::split(write_list, " ,")) { + auto grant = parse_grant(dpp, driver, std::string{uid}, SWIFT_PERM_WRITE); + if (!grant) { + ldpp_dout(dpp, 4) << "ERROR: failed to parse write acl grant " + << uid << dendl; + return -EINVAL; + } + acl.add_grant(*grant); } rw_mask |= SWIFT_PERM_WRITE; } return 0; } -void RGWAccessControlPolicy_SWIFT::filter_merge(uint32_t rw_mask, - RGWAccessControlPolicy_SWIFT *old) +void merge_policy(uint32_t rw_mask, const RGWAccessControlPolicy& src, + RGWAccessControlPolicy& dest) { /* rw_mask&SWIFT_PERM_READ => setting read acl, * rw_mask&SWIFT_PERM_WRITE => setting write acl @@ -240,112 +219,78 @@ void RGWAccessControlPolicy_SWIFT::filter_merge(uint32_t rw_mask, return; } rw_mask ^= (SWIFT_PERM_READ|SWIFT_PERM_WRITE); - for (auto &iter: old->acl.get_grant_map()) { - ACLGrant& grant = iter.second; + for (const auto &iter: src.get_acl().get_grant_map()) { + const ACLGrant& grant = iter.second; uint32_t perm = grant.get_permission().get_permissions(); - rgw_user id; - string url_spec; - if (!grant.get_id(id)) { - if (grant.get_group() != ACL_GROUP_ALL_USERS) { - url_spec = grant.get_referer(); - if (url_spec.empty()) { - continue; - } - if (perm == 0) { - /* We need to carry also negative, HTTP referrer-based ACLs. */ - perm = SWIFT_PERM_READ; - } + if (const auto* referer = grant.get_referer(); referer) { + if (referer->url_spec.empty()) { + continue; + } + if (perm == 0) { + /* We need to carry also negative, HTTP referrer-based ACLs. */ + perm = SWIFT_PERM_READ; } } if (perm & rw_mask) { - acl.add_grant(&grant); + dest.get_acl().add_grant(grant); } } } -void RGWAccessControlPolicy_SWIFT::to_str(string& read, string& write) +void format_container_acls(const RGWAccessControlPolicy& policy, + std::string& read, std::string& write) { - multimap<string, ACLGrant>& m = acl.get_grant_map(); - multimap<string, ACLGrant>::iterator iter; - - for (iter = m.begin(); iter != m.end(); ++iter) { - ACLGrant& grant = iter->second; + for (const auto& [k, grant] : policy.get_acl().get_grant_map()) { const uint32_t perm = grant.get_permission().get_permissions(); - rgw_user id; - string url_spec; - if (!grant.get_id(id)) { - if (grant.get_group() == ACL_GROUP_ALL_USERS) { + std::string id; + std::string url_spec; + if (const auto user = grant.get_user(); user) { + id = user->id.to_str(); + } else if (const auto group = grant.get_group(); group) { + if (group->type == ACL_GROUP_ALL_USERS) { id = SWIFT_GROUP_ALL_USERS; - } else { - url_spec = grant.get_referer(); - if (url_spec.empty()) { - continue; - } - id = (perm != 0) ? ".r:" + url_spec : ".r:-" + url_spec; } + } else if (const auto referer = grant.get_referer(); referer) { + url_spec = referer->url_spec; + if (url_spec.empty()) { + continue; + } + id = (perm != 0) ? ".r:" + url_spec : ".r:-" + url_spec; } if (perm & SWIFT_PERM_READ) { if (!read.empty()) { read.append(","); } - read.append(id.to_str()); + read.append(id); } else if (perm & SWIFT_PERM_WRITE) { if (!write.empty()) { write.append(","); } - write.append(id.to_str()); + write.append(id); } else if (perm == 0 && !url_spec.empty()) { /* only X-Container-Read headers support referers */ if (!read.empty()) { read.append(","); } - read.append(id.to_str()); - } - } -} - -void RGWAccessControlPolicy_SWIFTAcct::add_grants(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - const std::vector<std::string>& uids, - const uint32_t perm) -{ - for (const auto& uid : uids) { - ACLGrant grant; - - if (uid_is_public(uid)) { - grant.set_group(ACL_GROUP_ALL_USERS, perm); - acl.add_grant(&grant); - } else { - std::unique_ptr<rgw::sal::User> user = driver->get_user(rgw_user(uid)); - - if (user->load_user(dpp, null_yield) < 0) { - ldpp_dout(dpp, 10) << "grant user does not exist:" << uid << dendl; - /* skipping silently */ - grant.set_canon(user->get_id(), std::string(), perm); - acl.add_grant(&grant); - } else { - grant.set_canon(user->get_id(), user->get_display_name(), perm); - acl.add_grant(&grant); - } + read.append(id); } } } -bool RGWAccessControlPolicy_SWIFTAcct::create(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - const rgw_user& id, - const std::string& name, - const std::string& acl_str) +int create_account_policy(const DoutPrefixProvider* dpp, + rgw::sal::Driver* driver, + const rgw_user& id, + const std::string& name, + const std::string& acl_str, + RGWAccessControlPolicy& policy) { - acl.create_default(id, name); - owner.set_id(id); - owner.set_name(name); + policy.create_default(id, name); + auto& acl = policy.get_acl(); JSONParser parser; - if (!parser.parse(acl_str.c_str(), acl_str.length())) { ldpp_dout(dpp, 0) << "ERROR: JSONParser::parse returned error=" << dendl; - return false; + return -EINVAL; } JSONObjIter iter = parser.find_first("admin"); @@ -354,7 +299,7 @@ bool RGWAccessControlPolicy_SWIFTAcct::create(const DoutPrefixProvider *dpp, decode_json_obj(admin, *iter); ldpp_dout(dpp, 0) << "admins: " << admin << dendl; - add_grants(dpp, driver, admin, SWIFT_PERM_ADMIN); + add_grants(dpp, driver, admin, SWIFT_PERM_ADMIN, acl); } iter = parser.find_first("read-write"); @@ -363,7 +308,7 @@ bool RGWAccessControlPolicy_SWIFTAcct::create(const DoutPrefixProvider *dpp, decode_json_obj(readwrite, *iter); ldpp_dout(dpp, 0) << "read-write: " << readwrite << dendl; - add_grants(dpp, driver, readwrite, SWIFT_PERM_RWRT); + add_grants(dpp, driver, readwrite, SWIFT_PERM_RWRT, acl); } iter = parser.find_first("read-only"); @@ -372,39 +317,47 @@ bool RGWAccessControlPolicy_SWIFTAcct::create(const DoutPrefixProvider *dpp, decode_json_obj(readonly, *iter); ldpp_dout(dpp, 0) << "read-only: " << readonly << dendl; - add_grants(dpp, driver, readonly, SWIFT_PERM_READ); + add_grants(dpp, driver, readonly, SWIFT_PERM_READ, acl); } - return true; + return 0; } -boost::optional<std::string> RGWAccessControlPolicy_SWIFTAcct::to_str() const +auto format_account_acl(const RGWAccessControlPolicy& policy) + -> std::optional<std::string> { + const ACLOwner& owner = policy.get_owner(); + std::vector<std::string> admin; std::vector<std::string> readwrite; std::vector<std::string> readonly; /* Partition the grant map into three not-overlapping groups. */ - for (const auto& item : get_acl().get_grant_map()) { + for (const auto& item : policy.get_acl().get_grant_map()) { const ACLGrant& grant = item.second; const uint32_t perm = grant.get_permission().get_permissions(); - rgw_user id; - if (!grant.get_id(id)) { - if (grant.get_group() != ACL_GROUP_ALL_USERS) { + std::string id; + if (const auto user = grant.get_user(); user) { + if (owner.id == user->id) { + continue; + } + id = user->id.to_str(); + } else if (const auto group = grant.get_group(); group) { + if (group->type != ACL_GROUP_ALL_USERS) { continue; } id = SWIFT_GROUP_ALL_USERS; - } else if (owner.get_id() == id) { + } else { continue; } if (SWIFT_PERM_ADMIN == (perm & SWIFT_PERM_ADMIN)) { - admin.insert(admin.end(), id.to_str()); + admin.insert(admin.end(), id); } else if (SWIFT_PERM_RWRT == (perm & SWIFT_PERM_RWRT)) { - readwrite.insert(readwrite.end(), id.to_str()); + readwrite.insert(readwrite.end(), id); } else if (SWIFT_PERM_READ == (perm & SWIFT_PERM_READ)) { - readonly.insert(readonly.end(), id.to_str()); + readonly.insert(readonly.end(), id); } else { // FIXME: print a warning } @@ -413,7 +366,7 @@ boost::optional<std::string> RGWAccessControlPolicy_SWIFTAcct::to_str() const /* If there is no grant to serialize, let's exit earlier to not return * an empty JSON object which brakes the functional tests of Swift. */ if (admin.empty() && readwrite.empty() && readonly.empty()) { - return boost::none; + return std::nullopt; } /* Serialize the groups. */ @@ -436,3 +389,5 @@ boost::optional<std::string> RGWAccessControlPolicy_SWIFTAcct::to_str() const return oss.str(); } + +} // namespace rgw::swift diff --git a/src/rgw/rgw_acl_swift.h b/src/rgw/rgw_acl_swift.h index 4cb1e4b8f8f..a16bea894db 100644 --- a/src/rgw/rgw_acl_swift.h +++ b/src/rgw/rgw_acl_swift.h @@ -3,56 +3,46 @@ #pragma once -#include <map> -#include <vector> #include <string> -#include <include/types.h> - -#include <boost/optional.hpp> - -#include "rgw_acl.h" - -class RGWUserCtl; - -class RGWAccessControlPolicy_SWIFT : public RGWAccessControlPolicy -{ - int add_grants(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, - const std::vector<std::string>& uids, - uint32_t perm); - -public: - explicit RGWAccessControlPolicy_SWIFT(CephContext* const cct) - : RGWAccessControlPolicy(cct) { - } - ~RGWAccessControlPolicy_SWIFT() override = default; - - int create(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - const rgw_user& id, - const std::string& name, - const char* read_list, - const char* write_list, - uint32_t& rw_mask); - void filter_merge(uint32_t mask, RGWAccessControlPolicy_SWIFT *policy); - void to_str(std::string& read, std::string& write); -}; - -class RGWAccessControlPolicy_SWIFTAcct : public RGWAccessControlPolicy -{ -public: - explicit RGWAccessControlPolicy_SWIFTAcct(CephContext * const cct) - : RGWAccessControlPolicy(cct) { - } - ~RGWAccessControlPolicy_SWIFTAcct() override {} - - void add_grants(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - const std::vector<std::string>& uids, - uint32_t perm); - bool create(const DoutPrefixProvider *dpp, - rgw::sal::Driver* driver, - const rgw_user& id, - const std::string& name, - const std::string& acl_str); - boost::optional<std::string> to_str() const; -}; +#include "rgw_sal_fwd.h" +#include "rgw_user_types.h" + +class DoutPrefixProvider; +class RGWAccessControlPolicy; + +namespace rgw::swift { + +/// Create a policy based on swift container acl headers +/// X-Container-Read/X-Container-Write. +int create_container_policy(const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + const rgw_user& id, + const std::string& name, + const char* read_list, + const char* write_list, + uint32_t& rw_mask, + RGWAccessControlPolicy& policy); + +/// Copy grants matching the permission mask (SWIFT_PERM_READ/WRITE) from +/// one policy to another. +void merge_policy(uint32_t rw_mask, const RGWAccessControlPolicy& src, + RGWAccessControlPolicy& dest); + +/// Format the policy in terms of X-Container-Read/X-Container-Write strings. +void format_container_acls(const RGWAccessControlPolicy& policy, + std::string& read, std::string& write); + +/// Create a policy based on swift account acl header X-Account-Access-Control. +int create_account_policy(const DoutPrefixProvider* dpp, + rgw::sal::Driver* driver, + const rgw_user& id, + const std::string& name, + const std::string& acl_str, + RGWAccessControlPolicy& policy); + +/// Format the policy in terms of the X-Account-Access-Control string. Returns +/// std::nullopt if there are no admin/read-write/read-only entries. +auto format_account_acl(const RGWAccessControlPolicy& policy) + -> std::optional<std::string>; + +} // namespace rgw::swift diff --git a/src/rgw/rgw_acl_types.h b/src/rgw/rgw_acl_types.h index c76d085e0b5..b9866e9b289 100644 --- a/src/rgw/rgw_acl_types.h +++ b/src/rgw/rgw_acl_types.h @@ -181,12 +181,14 @@ class ACLGranteeType protected: __u32 type; public: - ACLGranteeType() : type(ACL_TYPE_UNKNOWN) {} - virtual ~ACLGranteeType() {} -// virtual const char *to_string() = 0; + ACLGranteeType(ACLGranteeTypeEnum t = ACL_TYPE_UNKNOWN) : type(t) {} + ACLGranteeTypeEnum get_type() const { return (ACLGranteeTypeEnum)type; } + operator ACLGranteeTypeEnum() const { return get_type(); } + void set(ACLGranteeTypeEnum t) { type = t; } -// virtual void set(const char *s) = 0; + ACLGranteeType& operator=(ACLGranteeTypeEnum t) { set(t); return *this; } + void encode(bufferlist& bl) const { ENCODE_START(2, 2, bl); encode(type, bl); @@ -204,10 +206,3 @@ public: friend bool operator!=(const ACLGranteeType& lhs, const ACLGranteeType& rhs); }; WRITE_CLASS_ENCODER(ACLGranteeType) - -class ACLGrantee -{ -public: - ACLGrantee() {} - ~ACLGrantee() {} -}; diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc index c19d44c9e53..f4a2d1480bd 100644 --- a/src/rgw/rgw_admin.cc +++ b/src/rgw/rgw_admin.cc @@ -2098,7 +2098,7 @@ stringstream& push_ss(stringstream& ss, list<string>& l, int tab = 0) static void get_md_sync_status(list<string>& status) { - RGWMetaSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->rados->get_async_processor()); + RGWMetaSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->async_processor); int ret = sync.init(dpp()); if (ret < 0) { @@ -2254,7 +2254,7 @@ static void get_data_sync_status(const rgw_zone_id& source_zone, list<string>& s flush_ss(ss, status); return; } - RGWDataSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->rados->get_async_processor(), source_zone, nullptr); + RGWDataSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->async_processor, source_zone, nullptr); int ret = sync.init(dpp()); if (ret < 0) { @@ -8995,7 +8995,7 @@ next: } if (opt_cmd == OPT::METADATA_SYNC_STATUS) { - RGWMetaSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->rados->get_async_processor()); + RGWMetaSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->async_processor); int ret = sync.init(dpp()); if (ret < 0) { @@ -9039,7 +9039,7 @@ next: } if (opt_cmd == OPT::METADATA_SYNC_INIT) { - RGWMetaSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->rados->get_async_processor()); + RGWMetaSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->async_processor); int ret = sync.init(dpp()); if (ret < 0) { @@ -9055,7 +9055,7 @@ next: if (opt_cmd == OPT::METADATA_SYNC_RUN) { - RGWMetaSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->rados->get_async_processor()); + RGWMetaSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->async_processor); int ret = sync.init(dpp()); if (ret < 0) { @@ -9075,7 +9075,7 @@ next: cerr << "ERROR: source zone not specified" << std::endl; return EINVAL; } - RGWDataSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->rados->get_async_processor(), source_zone, nullptr); + RGWDataSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->async_processor, source_zone, nullptr); int ret = sync.init(dpp()); if (ret < 0) { @@ -9145,7 +9145,7 @@ next: return EINVAL; } - RGWDataSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->rados->get_async_processor(), source_zone, nullptr); + RGWDataSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->async_processor, source_zone, nullptr); int ret = sync.init(dpp()); if (ret < 0) { @@ -9174,7 +9174,7 @@ next: return ret; } - RGWDataSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->rados->get_async_processor(), source_zone, nullptr, sync_module); + RGWDataSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->async_processor, source_zone, nullptr, sync_module); ret = sync.init(dpp()); if (ret < 0) { diff --git a/src/rgw/rgw_common.cc b/src/rgw/rgw_common.cc index 6c07e323243..b16d9d97fa8 100644 --- a/src/rgw/rgw_common.cc +++ b/src/rgw/rgw_common.cc @@ -639,7 +639,7 @@ bool parse_iso8601(const char *s, struct tm *t, uint32_t *pns, bool extended_for return true; } -int parse_key_value(string& in_str, const char *delim, string& key, string& val) +int parse_key_value(const string& in_str, const char *delim, string& key, string& val) { if (delim == NULL) return -EINVAL; @@ -654,7 +654,7 @@ int parse_key_value(string& in_str, const char *delim, string& key, string& val) return 0; } -int parse_key_value(string& in_str, string& key, string& val) +int parse_key_value(const string& in_str, string& key, string& val) { return parse_key_value(in_str, "=", key,val); } @@ -1135,7 +1135,7 @@ Effect eval_identity_or_session_policies(const DoutPrefixProvider* dpp, bool verify_user_permission(const DoutPrefixProvider* dpp, perm_state_base * const s, - RGWAccessControlPolicy * const user_acl, + const RGWAccessControlPolicy& user_acl, const vector<rgw::IAM::Policy>& user_policies, const vector<rgw::IAM::Policy>& session_policies, const rgw::ARN& res, @@ -1176,20 +1176,20 @@ bool verify_user_permission(const DoutPrefixProvider* dpp, bool verify_user_permission_no_policy(const DoutPrefixProvider* dpp, struct perm_state_base * const s, - RGWAccessControlPolicy * const user_acl, + const RGWAccessControlPolicy& user_acl, const int perm) { if (s->identity->get_identity_type() == TYPE_ROLE) return false; - /* S3 doesn't support account ACLs. */ - if (!user_acl) + /* S3 doesn't support account ACLs, so user_acl will be uninitialized. */ + if (user_acl.get_owner().id.empty()) return true; if ((perm & (int)s->perm_mask) != perm) return false; - return user_acl->verify_permission(dpp, *s->identity, perm, perm); + return user_acl.verify_permission(dpp, *s->identity, perm, perm); } bool verify_user_permission(const DoutPrefixProvider* dpp, @@ -1199,7 +1199,7 @@ bool verify_user_permission(const DoutPrefixProvider* dpp, bool mandatory_policy) { perm_state_from_req_state ps(s); - return verify_user_permission(dpp, &ps, s->user_acl.get(), s->iam_user_policies, s->session_policies, res, op, mandatory_policy); + return verify_user_permission(dpp, &ps, s->user_acl, s->iam_user_policies, s->session_policies, res, op, mandatory_policy); } bool verify_user_permission_no_policy(const DoutPrefixProvider* dpp, @@ -1207,7 +1207,7 @@ bool verify_user_permission_no_policy(const DoutPrefixProvider* dpp, const int perm) { perm_state_from_req_state ps(s); - return verify_user_permission_no_policy(dpp, &ps, s->user_acl.get(), perm); + return verify_user_permission_no_policy(dpp, &ps, s->user_acl, perm); } bool verify_requester_payer_permission(struct perm_state_base *s) @@ -1233,8 +1233,8 @@ bool verify_requester_payer_permission(struct perm_state_base *s) bool verify_bucket_permission(const DoutPrefixProvider* dpp, struct perm_state_base * const s, const rgw_bucket& bucket, - RGWAccessControlPolicy * const user_acl, - RGWAccessControlPolicy * const bucket_acl, + const RGWAccessControlPolicy& user_acl, + const RGWAccessControlPolicy& bucket_acl, const boost::optional<Policy>& bucket_policy, const vector<Policy>& identity_policies, const vector<Policy>& session_policies, @@ -1292,8 +1292,8 @@ bool verify_bucket_permission(const DoutPrefixProvider* dpp, bool verify_bucket_permission(const DoutPrefixProvider* dpp, req_state * const s, const rgw_bucket& bucket, - RGWAccessControlPolicy * const user_acl, - RGWAccessControlPolicy * const bucket_acl, + const RGWAccessControlPolicy& user_acl, + const RGWAccessControlPolicy& bucket_acl, const boost::optional<Policy>& bucket_policy, const vector<Policy>& user_policies, const vector<Policy>& session_policies, @@ -1307,31 +1307,25 @@ bool verify_bucket_permission(const DoutPrefixProvider* dpp, } bool verify_bucket_permission_no_policy(const DoutPrefixProvider* dpp, struct perm_state_base * const s, - RGWAccessControlPolicy * const user_acl, - RGWAccessControlPolicy * const bucket_acl, + const RGWAccessControlPolicy& user_acl, + const RGWAccessControlPolicy& bucket_acl, const int perm) { - if (!bucket_acl) - return false; - if ((perm & (int)s->perm_mask) != perm) return false; - if (bucket_acl->verify_permission(dpp, *s->identity, perm, perm, - s->get_referer(), - s->bucket_access_conf && - s->bucket_access_conf->ignore_public_acls())) + if (bucket_acl.verify_permission(dpp, *s->identity, perm, perm, + s->get_referer(), + s->bucket_access_conf && + s->bucket_access_conf->ignore_public_acls())) return true; - if (!user_acl) - return false; - - return user_acl->verify_permission(dpp, *s->identity, perm, perm); + return user_acl.verify_permission(dpp, *s->identity, perm, perm); } bool verify_bucket_permission_no_policy(const DoutPrefixProvider* dpp, req_state * const s, - RGWAccessControlPolicy * const user_acl, - RGWAccessControlPolicy * const bucket_acl, + const RGWAccessControlPolicy& user_acl, + const RGWAccessControlPolicy& bucket_acl, const int perm) { perm_state_from_req_state ps(s); @@ -1351,8 +1345,8 @@ bool verify_bucket_permission_no_policy(const DoutPrefixProvider* dpp, req_state return verify_bucket_permission_no_policy(dpp, &ps, - s->user_acl.get(), - s->bucket_acl.get(), + s->user_acl, + s->bucket_acl, perm); } @@ -1368,8 +1362,8 @@ bool verify_bucket_permission(const DoutPrefixProvider* dpp, req_state * const s return verify_bucket_permission(dpp, &ps, s->bucket->get_key(), - s->user_acl.get(), - s->bucket_acl.get(), + s->user_acl, + s->bucket_acl, s->iam_policy, s->iam_user_policies, s->session_policies, @@ -1421,7 +1415,7 @@ int verify_bucket_owner_or_policy(req_state* const s, identity_policy_res == Effect::Allow || (e == Effect::Pass && identity_policy_res == Effect::Pass && - s->auth.identity->is_owner_of(s->bucket_owner.get_id()))) { + s->auth.identity->is_owner_of(s->bucket_owner.id))) { return 0; } else { return -EACCES; @@ -1432,8 +1426,8 @@ int verify_bucket_owner_or_policy(req_state* const s, static inline bool check_deferred_bucket_perms(const DoutPrefixProvider* dpp, struct perm_state_base * const s, const rgw_bucket& bucket, - RGWAccessControlPolicy * const user_acl, - RGWAccessControlPolicy * const bucket_acl, + const RGWAccessControlPolicy& user_acl, + const RGWAccessControlPolicy& bucket_acl, const boost::optional<Policy>& bucket_policy, const vector<Policy>& identity_policies, const vector<Policy>& session_policies, @@ -1446,8 +1440,8 @@ static inline bool check_deferred_bucket_perms(const DoutPrefixProvider* dpp, static inline bool check_deferred_bucket_only_acl(const DoutPrefixProvider* dpp, struct perm_state_base * const s, - RGWAccessControlPolicy * const user_acl, - RGWAccessControlPolicy * const bucket_acl, + const RGWAccessControlPolicy& user_acl, + const RGWAccessControlPolicy& bucket_acl, const uint8_t deferred_check, const int perm) { @@ -1457,9 +1451,9 @@ static inline bool check_deferred_bucket_only_acl(const DoutPrefixProvider* dpp, bool verify_object_permission(const DoutPrefixProvider* dpp, struct perm_state_base * const s, const rgw_obj& obj, - RGWAccessControlPolicy * const user_acl, - RGWAccessControlPolicy * const bucket_acl, - RGWAccessControlPolicy * const object_acl, + const RGWAccessControlPolicy& user_acl, + const RGWAccessControlPolicy& bucket_acl, + const RGWAccessControlPolicy& object_acl, const boost::optional<Policy>& bucket_policy, const vector<Policy>& identity_policies, const vector<Policy>& session_policies, @@ -1512,14 +1506,10 @@ bool verify_object_permission(const DoutPrefixProvider* dpp, struct perm_state_b return true; } - if (!object_acl) { - return false; - } - - bool ret = object_acl->verify_permission(dpp, *s->identity, s->perm_mask, perm, - nullptr, /* http_referrer */ - s->bucket_access_conf && - s->bucket_access_conf->ignore_public_acls()); + bool ret = object_acl.verify_permission(dpp, *s->identity, s->perm_mask, perm, + nullptr, /* http_referrer */ + s->bucket_access_conf && + s->bucket_access_conf->ignore_public_acls()); if (ret) { return true; } @@ -1541,21 +1531,18 @@ bool verify_object_permission(const DoutPrefixProvider* dpp, struct perm_state_b /* we already verified the user mask above, so we pass swift_perm as the mask here, otherwise the mask might not cover the swift permissions bits */ - if (bucket_acl->verify_permission(dpp, *s->identity, swift_perm, swift_perm, - s->get_referer())) + if (bucket_acl.verify_permission(dpp, *s->identity, swift_perm, swift_perm, + s->get_referer())) return true; - if (!user_acl) - return false; - - return user_acl->verify_permission(dpp, *s->identity, swift_perm, swift_perm); + return user_acl.verify_permission(dpp, *s->identity, swift_perm, swift_perm); } bool verify_object_permission(const DoutPrefixProvider* dpp, req_state * const s, const rgw_obj& obj, - RGWAccessControlPolicy * const user_acl, - RGWAccessControlPolicy * const bucket_acl, - RGWAccessControlPolicy * const object_acl, + const RGWAccessControlPolicy& user_acl, + const RGWAccessControlPolicy& bucket_acl, + const RGWAccessControlPolicy& object_acl, const boost::optional<Policy>& bucket_policy, const vector<Policy>& identity_policies, const vector<Policy>& session_policies, @@ -1570,9 +1557,9 @@ bool verify_object_permission(const DoutPrefixProvider* dpp, req_state * const s bool verify_object_permission_no_policy(const DoutPrefixProvider* dpp, struct perm_state_base * const s, - RGWAccessControlPolicy * const user_acl, - RGWAccessControlPolicy * const bucket_acl, - RGWAccessControlPolicy * const object_acl, + const RGWAccessControlPolicy& user_acl, + const RGWAccessControlPolicy& bucket_acl, + const RGWAccessControlPolicy& object_acl, const int perm) { if (check_deferred_bucket_only_acl(dpp, s, user_acl, bucket_acl, RGW_DEFER_TO_BUCKET_ACLS_RECURSE, perm) || @@ -1580,14 +1567,10 @@ bool verify_object_permission_no_policy(const DoutPrefixProvider* dpp, return true; } - if (!object_acl) { - return false; - } - - bool ret = object_acl->verify_permission(dpp, *s->identity, s->perm_mask, perm, - nullptr, /* http referrer */ - s->bucket_access_conf && - s->bucket_access_conf->ignore_public_acls()); + bool ret = object_acl.verify_permission(dpp, *s->identity, s->perm_mask, perm, + nullptr, /* http referrer */ + s->bucket_access_conf && + s->bucket_access_conf->ignore_public_acls()); if (ret) { return true; } @@ -1609,14 +1592,11 @@ bool verify_object_permission_no_policy(const DoutPrefixProvider* dpp, /* we already verified the user mask above, so we pass swift_perm as the mask here, otherwise the mask might not cover the swift permissions bits */ - if (bucket_acl->verify_permission(dpp, *s->identity, swift_perm, swift_perm, - s->get_referer())) + if (bucket_acl.verify_permission(dpp, *s->identity, swift_perm, swift_perm, + s->get_referer())) return true; - if (!user_acl) - return false; - - return user_acl->verify_permission(dpp, *s->identity, swift_perm, swift_perm); + return user_acl.verify_permission(dpp, *s->identity, swift_perm, swift_perm); } bool verify_object_permission_no_policy(const DoutPrefixProvider* dpp, req_state *s, int perm) @@ -1628,9 +1608,9 @@ bool verify_object_permission_no_policy(const DoutPrefixProvider* dpp, req_state return verify_object_permission_no_policy(dpp, &ps, - s->user_acl.get(), - s->bucket_acl.get(), - s->object_acl.get(), + s->user_acl, + s->bucket_acl, + s->object_acl, perm); } @@ -1641,9 +1621,9 @@ bool verify_object_permission(const DoutPrefixProvider* dpp, req_state *s, uint6 return verify_object_permission(dpp, &ps, rgw_obj(s->bucket->get_key(), s->object->get_key()), - s->user_acl.get(), - s->bucket_acl.get(), - s->object_acl.get(), + s->user_acl, + s->bucket_acl, + s->object_acl, s->iam_policy, s->iam_user_policies, s->session_policies, diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index 85d5ab2c155..e6973766929 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -1180,9 +1180,9 @@ struct req_state : DoutPrefixProvider { } s3_postobj_creds; } auth; - std::unique_ptr<RGWAccessControlPolicy> user_acl; - std::unique_ptr<RGWAccessControlPolicy> bucket_acl; - std::unique_ptr<RGWAccessControlPolicy> object_acl; + RGWAccessControlPolicy user_acl; + RGWAccessControlPolicy bucket_acl; + RGWAccessControlPolicy object_acl; rgw::IAM::Environment env; boost::optional<rgw::IAM::Policy> iam_policy; @@ -1490,8 +1490,8 @@ bool rgw_set_amz_meta_header( extern std::string rgw_string_unquote(const std::string& s); extern void parse_csv_string(const std::string& ival, std::vector<std::string>& ovals); -extern int parse_key_value(std::string& in_str, std::string& key, std::string& val); -extern int parse_key_value(std::string& in_str, const char *delim, std::string& key, std::string& val); +extern int parse_key_value(const std::string& in_str, std::string& key, std::string& val); +extern int parse_key_value(const std::string& in_str, const char *delim, std::string& key, std::string& val); extern boost::optional<std::pair<std::string_view,std::string_view>> parse_key_value(const std::string_view& in_str, @@ -1584,20 +1584,20 @@ struct perm_state : public perm_state_base { bool verify_bucket_permission_no_policy( const DoutPrefixProvider* dpp, struct perm_state_base * const s, - RGWAccessControlPolicy * const user_acl, - RGWAccessControlPolicy * const bucket_acl, + const RGWAccessControlPolicy& user_acl, + const RGWAccessControlPolicy& bucket_acl, const int perm); bool verify_user_permission_no_policy(const DoutPrefixProvider* dpp, struct perm_state_base * const s, - RGWAccessControlPolicy * const user_acl, + const RGWAccessControlPolicy& user_acl, const int perm); bool verify_object_permission_no_policy(const DoutPrefixProvider* dpp, struct perm_state_base * const s, - RGWAccessControlPolicy * const user_acl, - RGWAccessControlPolicy * const bucket_acl, - RGWAccessControlPolicy * const object_acl, + const RGWAccessControlPolicy& user_acl, + const RGWAccessControlPolicy& bucket_acl, + const RGWAccessControlPolicy& object_acl, const int perm); /** Check if the req_state's user has the necessary permissions @@ -1609,7 +1609,7 @@ rgw::IAM::Effect eval_identity_or_session_policies(const DoutPrefixProvider* dpp const rgw::ARN& arn); bool verify_user_permission(const DoutPrefixProvider* dpp, req_state * const s, - RGWAccessControlPolicy * const user_acl, + const RGWAccessControlPolicy& user_acl, const std::vector<rgw::IAM::Policy>& user_policies, const std::vector<rgw::IAM::Policy>& session_policies, const rgw::ARN& res, @@ -1617,7 +1617,7 @@ bool verify_user_permission(const DoutPrefixProvider* dpp, bool mandatory_policy=true); bool verify_user_permission_no_policy(const DoutPrefixProvider* dpp, req_state * const s, - RGWAccessControlPolicy * const user_acl, + const RGWAccessControlPolicy& user_acl, const int perm); bool verify_user_permission(const DoutPrefixProvider* dpp, req_state * const s, @@ -1631,8 +1631,8 @@ bool verify_bucket_permission( const DoutPrefixProvider* dpp, req_state * const s, const rgw_bucket& bucket, - RGWAccessControlPolicy * const user_acl, - RGWAccessControlPolicy * const bucket_acl, + const RGWAccessControlPolicy& user_acl, + const RGWAccessControlPolicy& bucket_acl, const boost::optional<rgw::IAM::Policy>& bucket_policy, const std::vector<rgw::IAM::Policy>& identity_policies, const std::vector<rgw::IAM::Policy>& session_policies, @@ -1641,8 +1641,8 @@ bool verify_bucket_permission(const DoutPrefixProvider* dpp, req_state * const s bool verify_bucket_permission_no_policy( const DoutPrefixProvider* dpp, req_state * const s, - RGWAccessControlPolicy * const user_acl, - RGWAccessControlPolicy * const bucket_acl, + const RGWAccessControlPolicy& user_acl, + const RGWAccessControlPolicy& bucket_acl, const int perm); bool verify_bucket_permission_no_policy(const DoutPrefixProvider* dpp, req_state * const s, @@ -1653,9 +1653,9 @@ extern bool verify_object_permission( const DoutPrefixProvider* dpp, req_state * const s, const rgw_obj& obj, - RGWAccessControlPolicy * const user_acl, - RGWAccessControlPolicy * const bucket_acl, - RGWAccessControlPolicy * const object_acl, + const RGWAccessControlPolicy& user_acl, + const RGWAccessControlPolicy& bucket_acl, + const RGWAccessControlPolicy& object_acl, const boost::optional<rgw::IAM::Policy>& bucket_policy, const std::vector<rgw::IAM::Policy>& identity_policies, const std::vector<rgw::IAM::Policy>& session_policies, @@ -1664,9 +1664,9 @@ extern bool verify_object_permission(const DoutPrefixProvider* dpp, req_state *s extern bool verify_object_permission_no_policy( const DoutPrefixProvider* dpp, req_state * const s, - RGWAccessControlPolicy * const user_acl, - RGWAccessControlPolicy * const bucket_acl, - RGWAccessControlPolicy * const object_acl, + const RGWAccessControlPolicy& user_acl, + const RGWAccessControlPolicy& bucket_acl, + const RGWAccessControlPolicy& object_acl, int perm); extern bool verify_object_permission_no_policy(const DoutPrefixProvider* dpp, req_state *s, int perm); diff --git a/src/rgw/rgw_file.cc b/src/rgw/rgw_file.cc index e1217173fed..f0100e37c2b 100644 --- a/src/rgw/rgw_file.cc +++ b/src/rgw/rgw_file.cc @@ -1872,7 +1872,7 @@ namespace rgw { } } processor = get_driver()->get_atomic_writer(this, state->yield, state->object.get(), - state->bucket_owner.get_id(), + state->bucket_owner.id, &state->dest_placement, 0, state->req_id); op_ret = processor->prepare(state->yield); diff --git a/src/rgw/rgw_file_int.h b/src/rgw/rgw_file_int.h index 91c858e5b3b..5658003adb8 100644 --- a/src/rgw/rgw_file_int.h +++ b/src/rgw/rgw_file_int.h @@ -1914,11 +1914,9 @@ public: int get_params(optional_yield) override { req_state* state = get_state(); - RGWAccessControlPolicy_S3 s3policy(state->cct); - /* we don't have (any) headers, so just create canned ACLs */ - int ret = s3policy.create_canned(state->owner, state->bucket_owner, state->canned_acl); - policy = s3policy; - return ret; + /* we don't have (any) headers, so just create default ACLs */ + policy.create_default(state->owner.id, state->owner.display_name); + return 0; } void send_response() override { @@ -2030,11 +2028,9 @@ public: int get_params(optional_yield) override { req_state* state = get_state(); - RGWAccessControlPolicy_S3 s3policy(state->cct); - /* we don't have (any) headers, so just create canned ACLs */ - int ret = s3policy.create_canned(state->owner, state->bucket_owner, state->canned_acl); - policy = s3policy; - return ret; + /* we don't have (any) headers, so just create default ACLs */ + policy.create_default(state->owner.id, state->owner.display_name); + return 0; } int get_data(buffer::list& _bl) override { @@ -2534,11 +2530,9 @@ public: int get_params(optional_yield) override { req_state* state = get_state(); - RGWAccessControlPolicy_S3 s3policy(state->cct); - /* we don't have (any) headers, so just create canned ACLs */ - int ret = s3policy.create_canned(state->owner, state->bucket_owner, state->canned_acl); - policy = s3policy; - return ret; + /* we don't have (any) headers, so just create default ACLs */ + policy.create_default(state->owner.id, state->owner.display_name); + return 0; } int get_data(buffer::list& _bl) override { @@ -2641,15 +2635,13 @@ public: int get_params(optional_yield) override { req_state* s = get_state(); - RGWAccessControlPolicy_S3 s3policy(s->cct); - /* we don't have (any) headers, so just create canned ACLs */ - int ret = s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl); - dest_policy = s3policy; + /* we don't have (any) headers, so just create default ACLs */ + dest_policy.create_default(s->owner.id, s->owner.display_name); /* src_object required before RGWCopyObj::verify_permissions() */ rgw_obj_key k = rgw_obj_key(src_name); s->src_object = s->bucket->get_object(k); s->object = s->src_object->clone(); // needed to avoid trap at rgw_op.cc:5150 - return ret; + return 0; } void send_response() override {} diff --git a/src/rgw/rgw_kafka.cc b/src/rgw/rgw_kafka.cc index eb6b85071a7..4593a4de67b 100644 --- a/src/rgw/rgw_kafka.cc +++ b/src/rgw/rgw_kafka.cc @@ -100,8 +100,9 @@ struct connection_t { // fire all remaining callbacks (if not fired by rd_kafka_flush) std::for_each(callbacks.begin(), callbacks.end(), [this](auto& cb_tag) { cb_tag.cb(status); - ldout(cct, 20) << "Kafka destroy: invoking callback with tag=" << cb_tag.tag << - " for: " << broker << dendl; + ldout(cct, 20) << "Kafka destroy: invoking callback with tag=" + << cb_tag.tag << " for: " << broker + << " with status: " << status << dendl; }); callbacks.clear(); delivery_tag = 1; @@ -418,7 +419,9 @@ private: if (tag) { auto const q_len = conn->callbacks.size(); if (q_len < max_inflight) { - ldout(conn->cct, 20) << "Kafka publish (with callback, tag=" << *tag << "): OK. Queue has: " << q_len << " callbacks" << dendl; + ldout(conn->cct, 20) + << "Kafka publish (with callback, tag=" << *tag + << "): OK. Queue has: " << q_len + 1 << " callbacks" << dendl; conn->callbacks.emplace_back(*tag, message->cb); } else { // immediately invoke callback with error - this is not a connection error @@ -463,6 +466,7 @@ private: if(conn->timestamp.sec() + max_idle_time < ceph_clock_now()) { ldout(conn->cct, 20) << "kafka run: deleting a connection due to idle behaviour: " << ceph_clock_now() << dendl; std::lock_guard lock(connections_lock); + conn->status = STATUS_CONNECTION_IDLE; conn_it = connections.erase(conn_it); --connection_count; \ continue; diff --git a/src/rgw/rgw_lc.cc b/src/rgw/rgw_lc.cc index 79b81270457..b857cc7a82f 100644 --- a/src/rgw/rgw_lc.cc +++ b/src/rgw/rgw_lc.cc @@ -41,6 +41,9 @@ #define dout_context g_ceph_context #define dout_subsys ceph_subsys_rgw +constexpr int32_t hours_in_a_day = 24; +constexpr int32_t secs_in_a_day = hours_in_a_day * 60 * 60; + using namespace std; const char* LC_STATUS[] = { @@ -289,7 +292,7 @@ static bool obj_has_expired(const DoutPrefixProvider *dpp, CephContext *cct, cep utime_t base_time; if (cct->_conf->rgw_lc_debug_interval <= 0) { /* Normal case, run properly */ - cmp = double(days)*24*60*60; + cmp = double(days) * secs_in_a_day; base_time = ceph_clock_now().round_to_day(); } else { /* We're in debug mode; Treat each rgw_lc_debug_interval seconds as a day */ @@ -546,9 +549,9 @@ static int remove_expired_obj( = obj->get_delete_op(); del_op->params.versioning_status = obj->get_bucket()->get_info().versioning_status(); - del_op->params.obj_owner.set_id(rgw_user {meta.owner}); - del_op->params.obj_owner.set_name(meta.owner_display_name); - del_op->params.bucket_owner.set_id(bucket_info.owner); + del_op->params.obj_owner.id = rgw_user{meta.owner}; + del_op->params.obj_owner.display_name = meta.owner_display_name; + del_op->params.bucket_owner.id = bucket_info.owner; del_op->params.unmod_since = meta.mtime; // notification supported only for RADOS driver for now @@ -814,7 +817,6 @@ int RGWLC::handle_multipart_expiration(rgw::sal::Bucket* target, const multimap<string, lc_op>& prefix_map, LCWorker* worker, time_t stop_at, bool once) { - MultipartMetaFilter mp_filter; int ret; rgw::sal::Bucket::ListParams params; rgw::sal::Bucket::ListResults results; @@ -825,7 +827,7 @@ int RGWLC::handle_multipart_expiration(rgw::sal::Bucket* target, * operating on one shard at a time */ params.allow_unordered = true; params.ns = RGW_OBJ_NS_MULTIPART; - params.access_list_filter = &mp_filter; + params.access_list_filter = MultipartMetaFilter; const auto event_type = rgw::notify::ObjectExpirationAbortMPU; @@ -1108,7 +1110,7 @@ public: return is_expired; } - int process(lc_op_ctx& oc) { + int process(lc_op_ctx& oc) override { auto& o = oc.o; int r; if (o.is_delete_marker()) { @@ -1172,7 +1174,7 @@ public: pass_object_lock_check(oc.driver, oc.obj.get(), dpp); } - int process(lc_op_ctx& oc) { + int process(lc_op_ctx& oc) override { auto& o = oc.o; int r = remove_expired_obj(oc.dpp, oc, true, rgw::notify::ObjectExpirationNoncurrent); @@ -1217,7 +1219,7 @@ public: return true; } - int process(lc_op_ctx& oc) { + int process(lc_op_ctx& oc) override { auto& o = oc.o; int r = remove_expired_obj(oc.dpp, oc, true, rgw::notify::ObjectExpirationDeleteMarker); @@ -1385,7 +1387,7 @@ public: return 0; } - int process(lc_op_ctx& oc) { + int process(lc_op_ctx& oc) override { auto& o = oc.o; int r; @@ -1460,7 +1462,7 @@ protected: public: LCOpAction_CurrentTransition(const transition_action& _transition) : LCOpAction_Transition(_transition) {} - int process(lc_op_ctx& oc) { + int process(lc_op_ctx& oc) override { int r = LCOpAction_Transition::process(oc); if (r == 0) { if (perfcounter) { @@ -1485,7 +1487,7 @@ public: const transition_action& _transition) : LCOpAction_Transition(_transition) {} - int process(lc_op_ctx& oc) { + int process(lc_op_ctx& oc) override { int r = LCOpAction_Transition::process(oc); if (r == 0) { if (perfcounter) { @@ -1944,8 +1946,7 @@ bool RGWLC::expired_session(time_t started) } time_t interval = (cct->_conf->rgw_lc_debug_interval > 0) - ? cct->_conf->rgw_lc_debug_interval - : 24*60*60; + ? cct->_conf->rgw_lc_debug_interval : secs_in_a_day; auto now = time(nullptr); @@ -1961,8 +1962,7 @@ bool RGWLC::expired_session(time_t started) time_t RGWLC::thread_stop_at() { uint64_t interval = (cct->_conf->rgw_lc_debug_interval > 0) - ? cct->_conf->rgw_lc_debug_interval - : 24*60*60; + ? cct->_conf->rgw_lc_debug_interval : secs_in_a_day; return time(nullptr) + interval; } @@ -2053,7 +2053,7 @@ static inline bool allow_shard_rollover(CephContext* cct, time_t now, time_t sha * - the current shard has not rolled over in the last 24 hours */ if (((shard_rollover_date < now) && - (now - shard_rollover_date > 24*60*60)) || + (now - shard_rollover_date > secs_in_a_day)) || (! shard_rollover_date /* no rollover date stored */) || (cct->_conf->rgw_lc_debug_interval > 0 /* defaults to -1 == disabled */)) { return true; @@ -2079,7 +2079,7 @@ static inline bool already_run_today(CephContext* cct, time_t start_date) bdt.tm_min = 0; bdt.tm_sec = 0; begin_of_day = mktime(&bdt); - if (now - begin_of_day < 24*60*60) + if (now - begin_of_day < secs_in_a_day) return true; else return false; @@ -2425,6 +2425,12 @@ bool RGWLC::LCWorker::should_work(utime_t& now) time_t tt = now.sec(); localtime_r(&tt, &bdt); + // next-day adjustment if the configured end_hour is less than start_hour + if (end_hour < start_hour) { + bdt.tm_hour = bdt.tm_hour > end_hour ? bdt.tm_hour : bdt.tm_hour + hours_in_a_day; + end_hour += hours_in_a_day; + } + if (cct->_conf->rgw_lc_debug_interval > 0) { /* We're debugging, so say we can run */ return true; @@ -2465,7 +2471,7 @@ int RGWLC::LCWorker::schedule_next_start_time(utime_t &start, utime_t& now) nt = mktime(&bdt); secs = nt - tt; - return secs>0 ? secs : secs+24*60*60; + return secs > 0 ? secs : secs + secs_in_a_day; } RGWLC::LCWorker::~LCWorker() @@ -2756,7 +2762,7 @@ std::string s3_expiration_header( if (rule_expiration.has_days()) { rule_expiration_date = boost::optional<ceph::real_time>( - mtime + make_timespan(double(rule_expiration.get_days())*24*60*60 - ceph::real_clock::to_time_t(mtime)%(24*60*60) + 24*60*60)); + mtime + make_timespan(double(rule_expiration.get_days()) * secs_in_a_day - ceph::real_clock::to_time_t(mtime)%(secs_in_a_day) + secs_in_a_day)); } } @@ -2835,7 +2841,7 @@ bool s3_multipart_abort_header( std::optional<ceph::real_time> rule_abort_date; if (mp_expiration.has_days()) { rule_abort_date = std::optional<ceph::real_time>( - mtime + make_timespan(mp_expiration.get_days()*24*60*60 - ceph::real_clock::to_time_t(mtime)%(24*60*60) + 24*60*60)); + mtime + make_timespan(mp_expiration.get_days() * secs_in_a_day - ceph::real_clock::to_time_t(mtime)%(secs_in_a_day) + secs_in_a_day)); } // update earliest abort date diff --git a/src/rgw/rgw_lib.cc b/src/rgw/rgw_lib.cc index 5b0a3c72849..3168abadb0b 100644 --- a/src/rgw/rgw_lib.cc +++ b/src/rgw/rgw_lib.cc @@ -468,6 +468,7 @@ namespace rgw { int RGWLib::init(vector<const char*>& args) { + int r{0}; /* alternative default for module */ map<std::string,std::string> defaults = { { "debug_rgw", "1/5" }, @@ -524,7 +525,13 @@ namespace rgw { register_async_signal_handler(SIGUSR1, rgw::signal::handle_sigterm); main.init_tracepoints(); - main.init_frontends2(this /* rgwlib */); + r = main.init_frontends2(this /* rgwlib */); + if (r != 0) { + derr << "ERROR: unable to initialize frontend, r = " << r << dendl; + main.shutdown(); + return r; + } + main.init_notification_endpoints(); main.init_lua(); @@ -601,8 +608,8 @@ namespace rgw { s->perm_mask = RGW_PERM_FULL_CONTROL; // populate the owner info - s->owner.set_id(s->user->get_id()); - s->owner.set_name(s->user->get_display_name()); + s->owner.id = s->user->get_id(); + s->owner.display_name = s->user->get_display_name(); return 0; } /* RGWHandler_Lib::authorize */ diff --git a/src/rgw/rgw_log.cc b/src/rgw/rgw_log.cc index 9bc27bbe9ff..cc7e085ab72 100644 --- a/src/rgw/rgw_log.cc +++ b/src/rgw/rgw_log.cc @@ -207,14 +207,14 @@ static void log_usage(req_state *s, const string& op_name) bucket_name = s->bucket_name; if (!bucket_name.empty()) { - bucket_name = s->bucket_name; - user = s->bucket_owner.get_id(); + bucket_name = s->bucket_name; + user = s->bucket_owner.id; if (!rgw::sal::Bucket::empty(s->bucket.get()) && s->bucket->get_info().requester_pays) { payer = s->user->get_id(); } } else { - user = s->user->get_id(); + user = s->user->get_id(); } bool error = s->err.is_err(); @@ -647,9 +647,8 @@ int rgw_log_op(RGWREST* const rest, req_state *s, const RGWOp* op, OpsLogSink *o } entry.user = s->user->get_id().to_str(); - if (s->object_acl) - entry.object_owner = s->object_acl->get_owner().get_id(); - entry.bucket_owner = s->bucket_owner.get_id(); + entry.object_owner = s->object_acl.get_owner().id; + entry.bucket_owner = s->bucket_owner.id; uint64_t bytes_sent = ACCOUNTING_IO(s)->get_bytes_sent(); uint64_t bytes_received = ACCOUNTING_IO(s)->get_bytes_received(); diff --git a/src/rgw/rgw_lua_request.cc b/src/rgw/rgw_lua_request.cc index cb819deff14..a36aad666f4 100644 --- a/src/rgw/rgw_lua_request.cc +++ b/src/rgw/rgw_lua_request.cc @@ -260,10 +260,9 @@ struct OwnerMetaTable : public EmptyMetaTable { const char* index = luaL_checkstring(L, 2); if (strcasecmp(index, "DisplayName") == 0) { - pushstring(L, owner->get_display_name()); + pushstring(L, owner->display_name); } else if (strcasecmp(index, "User") == 0) { - create_metatable<UserMetaTable>(L, name, index, false, - &(owner->get_id())); + create_metatable<UserMetaTable>(L, name, index, false, &owner->id); } else { return error_unknown_field(L, index, name); } @@ -365,19 +364,26 @@ struct GrantMetaTable : public EmptyMetaTable { if (strcasecmp(index, "Type") == 0) { lua_pushinteger(L, grant->get_type().get_type()); } else if (strcasecmp(index, "User") == 0) { - const auto id_ptr = grant->get_id(); - if (id_ptr) { + if (const auto user = grant->get_user(); user) { create_metatable<UserMetaTable>(L, name, index, false, - const_cast<rgw_user*>(id_ptr)); + const_cast<rgw_user*>(&user->id)); } else { lua_pushnil(L); } } else if (strcasecmp(index, "Permission") == 0) { lua_pushinteger(L, grant->get_permission().get_permissions()); } else if (strcasecmp(index, "GroupType") == 0) { - lua_pushinteger(L, grant->get_group()); + if (const auto group = grant->get_group(); group) { + lua_pushinteger(L, group->type); + } else { + lua_pushnil(L); + } } else if (strcasecmp(index, "Referer") == 0) { - pushstring(L, grant->get_referer()); + if (const auto referer = grant->get_referer(); referer) { + pushstring(L, referer->url_spec); + } else { + lua_pushnil(L); + } } else { return error_unknown_field(L, index, name); } @@ -712,11 +718,11 @@ struct RequestMetaTable : public EmptyMetaTable { } else if (strcasecmp(index, "ZoneGroup") == 0) { create_metatable<ZoneGroupMetaTable>(L, name, index, false, s); } else if (strcasecmp(index, "UserACL") == 0) { - create_metatable<ACLMetaTable>(L, name, index, false, s->user_acl); + create_metatable<ACLMetaTable>(L, name, index, false, &s->user_acl); } else if (strcasecmp(index, "BucketACL") == 0) { - create_metatable<ACLMetaTable>(L, name, index, false, s->bucket_acl); + create_metatable<ACLMetaTable>(L, name, index, false, &s->bucket_acl); } else if (strcasecmp(index, "ObjectACL") == 0) { - create_metatable<ACLMetaTable>(L, name, index, false, s->object_acl); + create_metatable<ACLMetaTable>(L, name, index, false, &s->object_acl); } else if (strcasecmp(index, "Environment") == 0) { create_metatable<StringMapMetaTable<rgw::IAM::Environment>>(L, name, index, false, &(s->env)); } else if (strcasecmp(index, "Policy") == 0) { diff --git a/src/rgw/rgw_main.cc b/src/rgw/rgw_main.cc index 27b02f84195..57407b43481 100644 --- a/src/rgw/rgw_main.cc +++ b/src/rgw/rgw_main.cc @@ -157,7 +157,12 @@ int main(int argc, char *argv[]) main.init_opslog(); main.init_tracepoints(); main.init_lua(); - main.init_frontends2(nullptr /* RGWLib */); + r = main.init_frontends2(nullptr /* RGWLib */); + if (r != 0) { + derr << "ERROR: initialize frontend fail, r = " << r << dendl; + main.shutdown(); + return r; + } main.init_notification_endpoints(); #if defined(HAVE_SYS_PRCTL_H) diff --git a/src/rgw/rgw_mdlog.h b/src/rgw/rgw_mdlog.h index 152126890b7..6c78fd38408 100644 --- a/src/rgw/rgw_mdlog.h +++ b/src/rgw/rgw_mdlog.h @@ -16,12 +16,13 @@ #pragma once +#include "include/rados/librados.hpp" + #include "common/RWLock.h" #include "rgw_metadata.h" #include "rgw_mdlog_types.h" - -#include "services/svc_rados.h" +#include "rgw_tools.h" #define META_LOG_OBJ_PREFIX "meta.log." @@ -40,7 +41,7 @@ class RGWMetadataLogInfoCompletion : public RefCountedObject { using info_callback_t = std::function<void(int, const cls_log_header&)>; private: cls_log_header header; - RGWSI_RADOS::Obj io_obj; + rgw_rados_ref io_obj; librados::AioCompletion *completion; std::mutex mutex; //< protects callback between cancel/complete boost::optional<info_callback_t> callback; //< cleared on cancel @@ -48,7 +49,7 @@ class RGWMetadataLogInfoCompletion : public RefCountedObject { explicit RGWMetadataLogInfoCompletion(info_callback_t callback); ~RGWMetadataLogInfoCompletion() override; - RGWSI_RADOS::Obj& get_io_obj() { return io_obj; } + rgw_rados_ref& get_io_obj() { return io_obj; } cls_log_header& get_header() { return header; } librados::AioCompletion* get_completion() { return completion; } diff --git a/src/rgw/rgw_multipart_meta_filter.cc b/src/rgw/rgw_multipart_meta_filter.cc index c616cd480f7..aeefc731f73 100644 --- a/src/rgw/rgw_multipart_meta_filter.cc +++ b/src/rgw/rgw_multipart_meta_filter.cc @@ -3,11 +3,9 @@ #include "svc_tier_rados.h" -using namespace std; - const std::string MP_META_SUFFIX = ".meta"; -bool MultipartMetaFilter::filter(const string& name, string& key) { +bool MultipartMetaFilter(const std::string& name, std::string& key) { // the length of the suffix so we can skip past it static const size_t MP_META_SUFFIX_LEN = MP_META_SUFFIX.length(); @@ -19,11 +17,11 @@ bool MultipartMetaFilter::filter(const string& name, string& key) { return false; size_t pos = name.find(MP_META_SUFFIX, len - MP_META_SUFFIX_LEN); - if (pos == string::npos) + if (pos == std::string::npos) return false; pos = name.rfind('.', pos - 1); - if (pos == string::npos) + if (pos == std::string::npos) return false; key = name.substr(0, pos); diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 7363db728a4..750ad7cb773 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -102,8 +102,6 @@ static string shadow_ns = RGW_OBJ_NS_SHADOW; static void forward_req_info(const DoutPrefixProvider *dpp, CephContext *cct, req_info& info, const std::string& bucket_name); -static MultipartMetaFilter mp_filter; - // this probably should belong in the rgw_iam_policy_keywords, I'll get it to it // at some point static constexpr auto S3_EXISTING_OBJTAG = "s3:ExistingObjectTag"; @@ -221,19 +219,18 @@ done: static int decode_policy(const DoutPrefixProvider *dpp, CephContext *cct, bufferlist& bl, - RGWAccessControlPolicy *policy) + RGWAccessControlPolicy& policy) { auto iter = bl.cbegin(); try { - policy->decode(iter); + policy.decode(iter); } catch (buffer::error& err) { ldpp_dout(dpp, 0) << "ERROR: could not decode policy, caught buffer::error" << dendl; return -EIO; } if (cct->_conf->subsys.should_gather<ceph_subsys_rgw, 15>()) { ldpp_dout(dpp, 15) << __func__ << " Read AccessControlPolicy"; - RGWAccessControlPolicy_S3 *s3policy = static_cast<RGWAccessControlPolicy_S3 *>(policy); - s3policy->to_xml(*_dout); + rgw::s3::write_policy_xml(policy, *_dout); *_dout << dendl; } return 0; @@ -245,17 +242,11 @@ static int get_user_policy_from_attr(const DoutPrefixProvider *dpp, map<string, bufferlist>& attrs, RGWAccessControlPolicy& policy /* out */) { - auto aiter = attrs.find(RGW_ATTR_ACL); - if (aiter != attrs.end()) { - int ret = decode_policy(dpp, cct, aiter->second, &policy); - if (ret < 0) { - return ret; - } - } else { + auto i = attrs.find(RGW_ATTR_ACL); + if (i == attrs.end()) { return -ENOENT; } - - return 0; + return decode_policy(dpp, cct, i->second, policy); } /** @@ -270,7 +261,7 @@ int rgw_op_get_bucket_policy_from_attr(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, const rgw_user& bucket_owner, map<string, bufferlist>& bucket_attrs, - RGWAccessControlPolicy *policy, + RGWAccessControlPolicy& policy, optional_yield y) { map<string, bufferlist>::iterator aiter = bucket_attrs.find(RGW_ATTR_ACL); @@ -287,7 +278,7 @@ int rgw_op_get_bucket_policy_from_attr(const DoutPrefixProvider *dpp, if (r < 0) return r; - policy->create_default(user->get_id(), user->get_display_name()); + policy.create_default(user->get_id(), user->get_display_name()); } return 0; } @@ -297,7 +288,7 @@ static int get_obj_policy_from_attr(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, RGWBucketInfo& bucket_info, map<string, bufferlist>& bucket_attrs, - RGWAccessControlPolicy *policy, + RGWAccessControlPolicy& policy, string *storage_class, rgw::sal::Object* obj, optional_yield y) @@ -320,7 +311,7 @@ static int get_obj_policy_from_attr(const DoutPrefixProvider *dpp, if (ret < 0) return ret; - policy->create_default(bucket_info.owner, user->get_display_name()); + policy.create_default(bucket_info.owner, user->get_display_name()); } if (storage_class) { @@ -387,7 +378,7 @@ static int read_bucket_policy(const DoutPrefixProvider *dpp, req_state *s, RGWBucketInfo& bucket_info, map<string, bufferlist>& bucket_attrs, - RGWAccessControlPolicy *policy, + RGWAccessControlPolicy& policy, rgw_bucket& bucket, optional_yield y) { @@ -404,7 +395,7 @@ static int read_bucket_policy(const DoutPrefixProvider *dpp, int ret = rgw_op_get_bucket_policy_from_attr(dpp, s->cct, driver, bucket_info.owner, bucket_attrs, policy, y); if (ret == -ENOENT) { - ret = -ERR_NO_SUCH_BUCKET; + ret = -ERR_NO_SUCH_BUCKET; } return ret; @@ -415,7 +406,7 @@ static int read_obj_policy(const DoutPrefixProvider *dpp, req_state *s, RGWBucketInfo& bucket_info, map<string, bufferlist>& bucket_attrs, - RGWAccessControlPolicy* acl, + RGWAccessControlPolicy& acl, string *storage_class, boost::optional<Policy>& policy, rgw::sal::Bucket* bucket, @@ -452,14 +443,14 @@ static int read_obj_policy(const DoutPrefixProvider *dpp, if (ret == -ENOENT) { /* object does not exist checking the bucket's ACL to make sure that we send a proper error code */ - RGWAccessControlPolicy bucket_policy(s->cct); + RGWAccessControlPolicy bucket_policy; ret = rgw_op_get_bucket_policy_from_attr(dpp, s->cct, driver, bucket_info.owner, - bucket_attrs, &bucket_policy, y); + bucket_attrs, bucket_policy, y); if (ret < 0) { return ret; } - const rgw_user& bucket_owner = bucket_policy.get_owner().get_id(); - if (bucket_owner.compare(s->user->get_id()) != 0 && + const rgw_user& bucket_owner = bucket_policy.get_owner().id; + if (bucket_owner != s->user->get_id() && ! s->auth.identity->is_admin_of(bucket_owner)) { auto r = eval_identity_or_session_policies(dpp, s->iam_user_policies, s->env, rgw::IAM::s3ListBucket, ARN(bucket->get_key())); @@ -514,20 +505,6 @@ int rgw_build_bucket_policies(const DoutPrefixProvider *dpp, rgw::sal::Driver* d } } - if(s->dialect.compare("s3") == 0) { - s->bucket_acl = std::make_unique<RGWAccessControlPolicy_S3>(s->cct); - } else if(s->dialect.compare("swift") == 0) { - /* We aren't allocating the account policy for those operations using - * the Swift's infrastructure that don't really need req_state::user. - * Typical example here is the implementation of /info. */ - if (!s->user->get_id().empty()) { - s->user_acl = std::make_unique<RGWAccessControlPolicy_SWIFTAcct>(s->cct); - } - s->bucket_acl = std::make_unique<RGWAccessControlPolicy_SWIFT>(s->cct); - } else { - s->bucket_acl = std::make_unique<RGWAccessControlPolicy>(s->cct); - } - const RGWZoneGroup& zonegroup = s->penv.site->get_zonegroup(); /* check if copy source is within the current domain */ @@ -576,13 +553,13 @@ int rgw_build_bucket_policies(const DoutPrefixProvider *dpp, rgw::sal::Driver* d s->bucket_attrs = s->bucket->get_attrs(); ret = read_bucket_policy(dpp, driver, s, s->bucket->get_info(), s->bucket->get_attrs(), - s->bucket_acl.get(), s->bucket->get_key(), y); + s->bucket_acl, s->bucket->get_key(), y); acct_acl_user = { s->bucket->get_info().owner, - s->bucket_acl->get_owner().get_display_name(), + s->bucket_acl.get_owner().display_name, }; - s->bucket_owner = s->bucket_acl->get_owner(); + s->bucket_owner = s->bucket_acl.get_owner(); s->zonegroup_endpoint = rgw::get_zonegroup_endpoint(zonegroup); s->zonegroup_name = zonegroup.get_name(); @@ -618,12 +595,12 @@ int rgw_build_bucket_policies(const DoutPrefixProvider *dpp, rgw::sal::Driver* d } /* handle user ACL only for those APIs which support it */ - if (s->user_acl) { + if (s->dialect == "swift" && !s->user->get_id().empty()) { std::unique_ptr<rgw::sal::User> acl_user = driver->get_user(acct_acl_user.uid); ret = acl_user->read_attrs(dpp, y); if (!ret) { - ret = get_user_policy_from_attr(dpp, s->cct, acl_user->get_attrs(), *s->user_acl); + ret = get_user_policy_from_attr(dpp, s->cct, acl_user->get_attrs(), s->user_acl); } if (-ENOENT == ret) { /* In already existing clusters users won't have ACL. In such case @@ -633,8 +610,8 @@ int rgw_build_bucket_policies(const DoutPrefixProvider *dpp, rgw::sal::Driver* d * 1. if we try to reach an existing bucket, its owner is considered * as account owner. * 2. otherwise account owner is identity stored in s->user->user_id. */ - s->user_acl->create_default(acct_acl_user.uid, - acct_acl_user.display_name); + s->user_acl.create_default(acct_acl_user.uid, + acct_acl_user.display_name); ret = 0; } else if (ret < 0) { ldpp_dout(dpp, 0) << "NOTICE: couldn't get user attrs for handling ACL " @@ -661,18 +638,29 @@ int rgw_build_bucket_policies(const DoutPrefixProvider *dpp, rgw::sal::Driver* d } } catch (const std::exception& e) { ldpp_dout(dpp, -1) << "Error reading IAM User Policy: " << e.what() << dendl; - ret = -EACCES; + if (!s->system_request) { + ret = -EACCES; + } } } try { s->iam_policy = get_iam_policy_from_attr(s->cct, s->bucket_attrs, s->bucket_tenant); } catch (const std::exception& e) { - // Really this is a can't happen condition. We parse the policy - // when it's given to us, so perhaps we should abort or otherwise - // raise bloody murder. ldpp_dout(dpp, 0) << "Error reading IAM Policy: " << e.what() << dendl; - ret = -EACCES; + + // This really shouldn't happen. We parse the policy when it's given to us, + // so a parsing failure here means we broke backward compatibility. The only + // sensible thing to do in this case is to deny access, because the policy + // may have. + // + // However, the only way for an administrator to repair such a bucket is to + // send a PutBucketPolicy or DeleteBucketPolicy request as an admin/system + // user. We can allow such requests, because even if the policy denied + // access, admin/system users override that error from verify_permission(). + if (!s->system_request) { + ret = -EACCES; + } } bool success = driver->get_zone()->get_redirect_endpoint(&s->redirect_zone_endpoint); @@ -692,24 +680,21 @@ int rgw_build_bucket_policies(const DoutPrefixProvider *dpp, rgw::sal::Driver* d int rgw_build_object_policies(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, req_state *s, bool prefetch_data, optional_yield y) { - int ret = 0; - - if (!rgw::sal::Object::empty(s->object.get())) { - if (!s->bucket_exists) { - return -ERR_NO_SUCH_BUCKET; - } - s->object_acl = std::make_unique<RGWAccessControlPolicy>(s->cct); + if (rgw::sal::Object::empty(s->object)) { + return 0; + } + if (!s->bucket_exists) { + return -ERR_NO_SUCH_BUCKET; + } - s->object->set_atomic(); - if (prefetch_data) { - s->object->set_prefetch_data(); - } - ret = read_obj_policy(dpp, driver, s, s->bucket->get_info(), s->bucket_attrs, - s->object_acl.get(), nullptr, s->iam_policy, s->bucket.get(), - s->object.get(), y); + s->object->set_atomic(); + if (prefetch_data) { + s->object->set_prefetch_data(); } - return ret; + return read_obj_policy(dpp, driver, s, s->bucket->get_info(), s->bucket_attrs, + s->object_acl, nullptr, s->iam_policy, s->bucket.get(), + s->object.get(), y); } static int rgw_iam_remove_objtags(const DoutPrefixProvider *dpp, req_state* s, rgw::sal::Object* object, bool has_existing_obj_tag, bool has_resource_tag) { @@ -1451,7 +1436,7 @@ int RGWOp::init_quota() driver->get_user(s->bucket->get_info().owner); rgw::sal::User* user; - if (s->user->get_id() == s->bucket_owner.get_id()) { + if (s->user->get_id() == s->bucket_owner.id) { user = s->user.get(); } else { int r = owner_user->load_user(this, s->yield); @@ -1642,9 +1627,8 @@ int rgw_policy_from_attrset(const DoutPrefixProvider *dpp, CephContext *cct, map return -EIO; } if (cct->_conf->subsys.should_gather<ceph_subsys_rgw, 15>()) { - RGWAccessControlPolicy_S3 *s3policy = static_cast<RGWAccessControlPolicy_S3 *>(policy); ldpp_dout(dpp, 15) << __func__ << " Read AccessControlPolicy"; - s3policy->to_xml(*_dout); + rgw::s3::write_policy_xml(*policy, *_dout); *_dout << dendl; } return 0; @@ -1652,7 +1636,7 @@ int rgw_policy_from_attrset(const DoutPrefixProvider *dpp, CephContext *cct, map int RGWGetObj::read_user_manifest_part(rgw::sal::Bucket* bucket, const rgw_bucket_dir_entry& ent, - RGWAccessControlPolicy * const bucket_acl, + const RGWAccessControlPolicy& bucket_acl, const boost::optional<Policy>& bucket_policy, const off_t start_ofs, const off_t end_ofs, @@ -1669,7 +1653,7 @@ int RGWGetObj::read_user_manifest_part(rgw::sal::Bucket* bucket, std::unique_ptr<rgw::sal::Object> part = bucket->get_object(ent.key); - RGWAccessControlPolicy obj_policy(s->cct); + RGWAccessControlPolicy obj_policy; ldpp_dout(this, 20) << "reading obj=" << part << " ofs=" << cur_ofs << " end=" << cur_end << dendl; @@ -1728,8 +1712,8 @@ int RGWGetObj::read_user_manifest_part(rgw::sal::Bucket* bucket, ldpp_dout(this, 2) << "overriding permissions due to system operation" << dendl; } else if (s->auth.identity->is_admin_of(s->user->get_id())) { ldpp_dout(this, 2) << "overriding permissions due to admin operation" << dendl; - } else if (!verify_object_permission(this, s, part->get_obj(), s->user_acl.get(), - bucket_acl, &obj_policy, bucket_policy, + } else if (!verify_object_permission(this, s, part->get_obj(), s->user_acl, + bucket_acl, obj_policy, bucket_policy, s->iam_user_policies, s->session_policies, action)) { return -EPERM; } @@ -1753,14 +1737,14 @@ static int iterate_user_manifest_parts(const DoutPrefixProvider *dpp, const off_t end, rgw::sal::Bucket* bucket, const string& obj_prefix, - RGWAccessControlPolicy * const bucket_acl, + const RGWAccessControlPolicy& bucket_acl, const boost::optional<Policy>& bucket_policy, uint64_t * const ptotal_len, uint64_t * const pobj_size, string * const pobj_sum, int (*cb)(rgw::sal::Bucket* bucket, const rgw_bucket_dir_entry& ent, - RGWAccessControlPolicy * const bucket_acl, + const RGWAccessControlPolicy& bucket_acl, const boost::optional<Policy>& bucket_policy, off_t start_ofs, off_t end_ofs, @@ -1862,7 +1846,7 @@ static int iterate_slo_parts(const DoutPrefixProvider *dpp, map<uint64_t, rgw_slo_part>& slo_parts, int (*cb)(rgw::sal::Bucket* bucket, const rgw_bucket_dir_entry& ent, - RGWAccessControlPolicy *bucket_acl, + const RGWAccessControlPolicy& bucket_acl, const boost::optional<Policy>& bucket_policy, off_t start_ofs, off_t end_ofs, @@ -1921,7 +1905,7 @@ static int iterate_slo_parts(const DoutPrefixProvider *dpp, << dendl; // SLO is a Swift thing, and Swift has no knowledge of S3 Policies. - int r = cb(part.bucket, ent, part.bucket_acl, + int r = cb(part.bucket, ent, *part.bucket_acl, (part.bucket_policy ? boost::optional<Policy>(*part.bucket_policy) : none), start_ofs, end_ofs, cb_param, true /* swift_slo */); @@ -1938,7 +1922,7 @@ static int iterate_slo_parts(const DoutPrefixProvider *dpp, static int get_obj_user_manifest_iterate_cb(rgw::sal::Bucket* bucket, const rgw_bucket_dir_entry& ent, - RGWAccessControlPolicy * const bucket_acl, + const RGWAccessControlPolicy& bucket_acl, const boost::optional<Policy>& bucket_policy, const off_t start_ofs, const off_t end_ofs, @@ -1964,7 +1948,7 @@ int RGWGetObj::handle_user_manifest(const char *prefix, optional_yield y) const std::string bucket_name = url_decode(prefix_view.substr(0, pos)); const std::string obj_prefix = url_decode(prefix_view.substr(pos + 1)); - RGWAccessControlPolicy _bucket_acl(s->cct); + RGWAccessControlPolicy _bucket_acl; RGWAccessControlPolicy *bucket_acl; boost::optional<Policy> _bucket_policy; boost::optional<Policy>* bucket_policy; @@ -1983,7 +1967,7 @@ int RGWGetObj::handle_user_manifest(const char *prefix, optional_yield y) return r; } bucket_acl = &_bucket_acl; - r = read_bucket_policy(this, driver, s, ubucket->get_info(), bucket_attrs, bucket_acl, ubucket->get_key(), y); + r = read_bucket_policy(this, driver, s, ubucket->get_info(), bucket_attrs, *bucket_acl, ubucket->get_key(), y); if (r < 0) { ldpp_dout(this, 0) << "failed to read bucket policy" << dendl; return r; @@ -1993,7 +1977,7 @@ int RGWGetObj::handle_user_manifest(const char *prefix, optional_yield y) pbucket = ubucket.get(); } else { pbucket = s->bucket.get(); - bucket_acl = s->bucket_acl.get(); + bucket_acl = &s->bucket_acl; bucket_policy = &s->iam_policy; } @@ -2002,7 +1986,7 @@ int RGWGetObj::handle_user_manifest(const char *prefix, optional_yield y) * - overall DLO's content size, * - md5 sum of overall DLO's content (for etag of Swift API). */ r = iterate_user_manifest_parts(this, s->cct, driver, ofs, end, - pbucket, obj_prefix, bucket_acl, *bucket_policy, + pbucket, obj_prefix, *bucket_acl, *bucket_policy, nullptr, &s->obj_size, &lo_etag, nullptr /* cb */, nullptr /* cb arg */, y); if (r < 0) { @@ -2016,7 +2000,7 @@ int RGWGetObj::handle_user_manifest(const char *prefix, optional_yield y) } r = iterate_user_manifest_parts(this, s->cct, driver, ofs, end, - pbucket, obj_prefix, bucket_acl, *bucket_policy, + pbucket, obj_prefix, *bucket_acl, *bucket_policy, &total_len, nullptr, nullptr, nullptr, nullptr, y); if (r < 0) { @@ -2030,7 +2014,7 @@ int RGWGetObj::handle_user_manifest(const char *prefix, optional_yield y) } r = iterate_user_manifest_parts(this, s->cct, driver, ofs, end, - pbucket, obj_prefix, bucket_acl, *bucket_policy, + pbucket, obj_prefix, *bucket_acl, *bucket_policy, nullptr, nullptr, nullptr, get_obj_user_manifest_iterate_cb, (void *)this, y); if (r < 0) { @@ -2102,8 +2086,7 @@ int RGWGetObj::handle_slo_manifest(bufferlist& bl, optional_yield y) bucket_policy = piter->second.second.get_ptr(); bucket = buckets[bucket_name].get(); } else { - allocated_acls.push_back(RGWAccessControlPolicy(s->cct)); - RGWAccessControlPolicy& _bucket_acl = allocated_acls.back(); + RGWAccessControlPolicy& _bucket_acl = allocated_acls.emplace_back(); std::unique_ptr<rgw::sal::Bucket> tmp_bucket; int r = driver->load_bucket(this, rgw_bucket(s->user->get_tenant(), @@ -2116,7 +2099,7 @@ int RGWGetObj::handle_slo_manifest(bufferlist& bl, optional_yield y) } bucket = tmp_bucket.get(); bucket_acl = &_bucket_acl; - r = read_bucket_policy(this, driver, s, tmp_bucket->get_info(), tmp_bucket->get_attrs(), bucket_acl, + r = read_bucket_policy(this, driver, s, tmp_bucket->get_info(), tmp_bucket->get_attrs(), *bucket_acl, tmp_bucket->get_key(), y); if (r < 0) { ldpp_dout(this, 0) << "failed to read bucket ACL for bucket " @@ -2131,7 +2114,7 @@ int RGWGetObj::handle_slo_manifest(bufferlist& bl, optional_yield y) } } else { bucket = s->bucket.get(); - bucket_acl = s->bucket_acl.get(); + bucket_acl = &s->bucket_acl; bucket_policy = s->iam_policy.get_ptr(); } @@ -3518,10 +3501,10 @@ void RGWCreateBucket::execute(optional_yield y) } // don't allow changes to the acl policy - RGWAccessControlPolicy old_policy(get_cct()); + RGWAccessControlPolicy old_policy; int r = rgw_op_get_bucket_policy_from_attr(this, s->cct, driver, info.owner, s->bucket->get_attrs(), - &old_policy, y); + old_policy, y); if (r >= 0 && old_policy != policy) { s->err.message = "Cannot modify existing access control policy"; op_ret = -EEXIST; @@ -3529,8 +3512,8 @@ void RGWCreateBucket::execute(optional_yield y) } } - s->bucket_owner.set_id(s->user->get_id()); - s->bucket_owner.set_name(s->user->get_display_name()); + s->bucket_owner.id = s->user->get_id(); + s->bucket_owner.display_name = s->user->get_display_name(); createparams.owner = s->user->get_id(); buffer::list aclbl; @@ -3845,7 +3828,7 @@ int RGWPutObj::verify_permission(optional_yield y) { if (! copy_source.empty()) { - RGWAccessControlPolicy cs_acl(s->cct); + RGWAccessControlPolicy cs_acl; boost::optional<Policy> policy; map<string, bufferlist> cs_attrs; auto cs_bucket = driver->get_bucket(copy_source_bucket_info); @@ -3855,14 +3838,14 @@ int RGWPutObj::verify_permission(optional_yield y) cs_object->set_prefetch_data(); /* check source object permissions */ - int ret = read_obj_policy(this, driver, s, copy_source_bucket_info, cs_attrs, &cs_acl, nullptr, + int ret = read_obj_policy(this, driver, s, copy_source_bucket_info, cs_attrs, cs_acl, nullptr, policy, cs_bucket.get(), cs_object.get(), y, true); if (ret < 0) { return ret; } /* admin request overrides permission checks */ - if (! s->auth.identity->is_admin_of(cs_acl.get_owner().get_id())) { + if (! s->auth.identity->is_admin_of(cs_acl.get_owner().id)) { if (policy || ! s->iam_user_policies.empty() || !s->session_policies.empty()) { //add source object tags for permission evaluation auto [has_s3_existing_tag, has_s3_resource_tag] = rgw_check_policy_condition(this, policy, s->iam_user_policies, s->session_policies); @@ -4271,7 +4254,7 @@ void RGWPutObj::execute(optional_yield y) return; } processor = driver->get_append_writer(this, s->yield, s->object.get(), - s->bucket_owner.get_id(), + s->bucket_owner.id, pdest_placement, s->req_id, position, &cur_accounted_size); } else { @@ -4284,7 +4267,7 @@ void RGWPutObj::execute(optional_yield y) } } processor = driver->get_atomic_writer(this, s->yield, s->object.get(), - s->bucket_owner.get_id(), + s->bucket_owner.id, pdest_placement, olh_epoch, s->req_id); } @@ -4690,7 +4673,7 @@ void RGWPostObj::execute(optional_yield y) std::unique_ptr<rgw::sal::Writer> processor; processor = driver->get_atomic_writer(this, s->yield, obj.get(), - s->bucket_owner.get_id(), + s->bucket_owner.id, &s->dest_placement, 0, s->req_id); op_ret = processor->prepare(s->yield); if (op_ret < 0) { @@ -4991,11 +4974,7 @@ void RGWPutMetadataBucket::execute(optional_yield y) * contain such keys yet. */ if (has_policy) { if (s->dialect.compare("swift") == 0) { - auto old_policy = \ - static_cast<RGWAccessControlPolicy_SWIFT*>(s->bucket_acl.get()); - auto new_policy = static_cast<RGWAccessControlPolicy_SWIFT*>(&policy); - new_policy->filter_merge(policy_rw_mask, old_policy); - policy = *new_policy; + rgw::swift::merge_policy(policy_rw_mask, s->bucket_acl, policy); } buffer::list bl; policy.encode(bl); @@ -5486,7 +5465,7 @@ int RGWCopyObj::init_processing(optional_yield y) int RGWCopyObj::verify_permission(optional_yield y) { - RGWAccessControlPolicy src_acl(s->cct); + RGWAccessControlPolicy src_acl; boost::optional<Policy> src_policy; /* get buckets info (source and dest) */ @@ -5497,7 +5476,7 @@ int RGWCopyObj::verify_permission(optional_yield y) rgw_placement_rule src_placement; /* check source object permissions */ - op_ret = read_obj_policy(this, driver, s, src_bucket->get_info(), src_bucket->get_attrs(), &src_acl, &src_placement.storage_class, + op_ret = read_obj_policy(this, driver, s, src_bucket->get_info(), src_bucket->get_attrs(), src_acl, &src_placement.storage_class, src_policy, src_bucket.get(), s->src_object.get(), y); if (op_ret < 0) { return op_ret; @@ -5514,7 +5493,7 @@ int RGWCopyObj::verify_permission(optional_yield y) } /* admin request overrides permission checks */ - if (!s->auth.identity->is_admin_of(src_acl.get_owner().get_id())) { + if (!s->auth.identity->is_admin_of(src_acl.get_owner().id)) { if (src_policy || ! s->iam_user_policies.empty() || !s->session_policies.empty()) { auto [has_s3_existing_tag, has_s3_resource_tag] = rgw_check_policy_condition(this, src_policy, s->iam_user_policies, s->session_policies); if (has_s3_existing_tag || has_s3_resource_tag) @@ -5585,20 +5564,20 @@ int RGWCopyObj::verify_permission(optional_yield y) } } - RGWAccessControlPolicy dest_bucket_policy(s->cct); + RGWAccessControlPolicy dest_bucket_policy; s->object->set_atomic(); /* check dest bucket permissions */ op_ret = read_bucket_policy(this, driver, s, s->bucket->get_info(), s->bucket->get_attrs(), - &dest_bucket_policy, s->bucket->get_key(), y); + dest_bucket_policy, s->bucket->get_key(), y); if (op_ret < 0) { return op_ret; } auto dest_iam_policy = get_iam_policy_from_attr(s->cct, s->bucket->get_attrs(), s->bucket->get_tenant()); /* admin request overrides permission checks */ - if (! s->auth.identity->is_admin_of(dest_policy.get_owner().get_id())){ + if (! s->auth.identity->is_admin_of(dest_policy.get_owner().id)){ if (dest_iam_policy != boost::none || ! s->iam_user_policies.empty() || !s->session_policies.empty()) { //Add destination bucket tags for authorization auto [has_s3_existing_tag, has_s3_resource_tag] = rgw_check_policy_condition(this, dest_iam_policy, s->iam_user_policies, s->session_policies); @@ -5896,11 +5875,11 @@ void RGWGetACLs::pre_exec() void RGWGetACLs::execute(optional_yield y) { stringstream ss; - RGWAccessControlPolicy* const acl = \ - (!rgw::sal::Object::empty(s->object.get()) ? s->object_acl.get() : s->bucket_acl.get()); - RGWAccessControlPolicy_S3* const s3policy = \ - static_cast<RGWAccessControlPolicy_S3*>(acl); - s3policy->to_xml(ss); + if (rgw::sal::Object::empty(s->object.get())) { + rgw::s3::write_policy_xml(s->bucket_acl, ss); + } else { + rgw::s3::write_policy_xml(s->object_acl, ss); + } acls = ss.str(); } @@ -5991,25 +5970,10 @@ void RGWDeleteLC::pre_exec() void RGWPutACLs::execute(optional_yield y) { - bufferlist bl; + const RGWAccessControlPolicy& existing_policy = \ + (rgw::sal::Object::empty(s->object.get()) ? s->bucket_acl : s->object_acl); - RGWAccessControlPolicy_S3 *policy = NULL; - RGWACLXMLParser_S3 parser(s->cct); - RGWAccessControlPolicy_S3 new_policy(s->cct); - stringstream ss; - - op_ret = 0; /* XXX redundant? */ - - if (!parser.init()) { - op_ret = -EINVAL; - return; - } - - - RGWAccessControlPolicy* const existing_policy = \ - (rgw::sal::Object::empty(s->object.get()) ? s->bucket_acl.get() : s->object_acl.get()); - - owner = existing_policy->get_owner(); + const ACLOwner& existing_owner = existing_policy.get_owner(); op_ret = get_params(y); if (op_ret < 0) { @@ -6032,26 +5996,24 @@ void RGWPutACLs::execute(optional_yield y) return; } + RGWAccessControlPolicy new_policy; if (!s->canned_acl.empty() || s->has_acl_header) { - op_ret = get_policy_from_state(driver, s, ss); - if (op_ret < 0) - return; - - data.clear(); - data.append(ss.str()); + op_ret = get_policy_from_state(existing_owner, new_policy); + } else { + op_ret = rgw::s3::parse_policy(this, y, driver, {data.c_str(), data.length()}, + new_policy, s->err.message); } - - if (!parser.parse(data.c_str(), data.length(), 1)) { - op_ret = -EINVAL; + if (op_ret < 0) return; - } - policy = static_cast<RGWAccessControlPolicy_S3 *>(parser.find_first("AccessControlPolicy")); - if (!policy) { - op_ret = -EINVAL; + + if (!existing_owner.id.empty() && + existing_owner.id != new_policy.get_owner().id) { + s->err.message = "Cannot modify ACL Owner"; + op_ret = -EPERM; return; } - const RGWAccessControlList& req_acl = policy->get_acl(); + const RGWAccessControlList& req_acl = new_policy.get_acl(); const multimap<string, ACLGrant>& req_grant_map = req_acl.get_grant_map(); #define ACL_GRANTS_MAX_NUM 100 int max_num = s->cct->_conf->rgw_acl_grants_max_num; @@ -6072,13 +6034,8 @@ void RGWPutACLs::execute(optional_yield y) // forward bucket acl requests to meta master zone if ((rgw::sal::Object::empty(s->object.get()))) { - bufferlist in_data; - // include acl data unless it was generated from a canned_acl - if (s->canned_acl.empty()) { - in_data.append(data); - } op_ret = rgw_forward_request_to_master(this, *s->penv.site, s->user->get_id(), - &in_data, nullptr, s->info, y); + &data, nullptr, s->info, y); if (op_ret < 0) { ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl; return; @@ -6087,17 +6044,11 @@ void RGWPutACLs::execute(optional_yield y) if (s->cct->_conf->subsys.should_gather<ceph_subsys_rgw, 15>()) { ldpp_dout(this, 15) << "Old AccessControlPolicy"; - policy->to_xml(*_dout); + rgw::s3::write_policy_xml(existing_policy, *_dout); *_dout << dendl; - } - - op_ret = policy->rebuild(this, driver, &owner, new_policy, s->err.message); - if (op_ret < 0) - return; - if (s->cct->_conf->subsys.should_gather<ceph_subsys_rgw, 15>()) { ldpp_dout(this, 15) << "New AccessControlPolicy:"; - new_policy.to_xml(*_dout); + rgw::s3::write_policy_xml(new_policy, *_dout); *_dout << dendl; } @@ -6107,6 +6058,8 @@ void RGWPutACLs::execute(optional_yield y) op_ret = -EACCES; return; } + + bufferlist bl; new_policy.encode(bl); map<string, bufferlist> attrs; @@ -7410,8 +7363,8 @@ bool RGWBulkDelete::Deleter::verify_permission(RGWBucketInfo& binfo, ACLOwner& bucket_owner /* out */, optional_yield y) { - RGWAccessControlPolicy bacl(driver->ctx()); - int ret = read_bucket_policy(dpp, driver, s, binfo, battrs, &bacl, binfo.bucket, y); + RGWAccessControlPolicy bacl; + int ret = read_bucket_policy(dpp, driver, s, binfo, battrs, bacl, binfo.bucket, y); if (ret < 0) { return false; } @@ -7422,8 +7375,8 @@ bool RGWBulkDelete::Deleter::verify_permission(RGWBucketInfo& binfo, /* We can use global user_acl because each BulkDelete request is allowed * to work on entities from a single account only. */ - return verify_bucket_permission(dpp, s, binfo.bucket, s->user_acl.get(), - &bacl, policy, s->iam_user_policies, s->session_policies, rgw::IAM::s3DeleteBucket); + return verify_bucket_permission(dpp, s, binfo.bucket, s->user_acl, + bacl, policy, s->iam_user_policies, s->session_policies, rgw::IAM::s3DeleteBucket); } bool RGWBulkDelete::Deleter::delete_single(const acct_path_t& path, optional_yield y) @@ -7447,7 +7400,7 @@ bool RGWBulkDelete::Deleter::delete_single(const acct_path_t& path, optional_yie if (!path.obj_key.empty()) { // object deletion ACLOwner bucket_owner; - bucket_owner.set_id(bucket->get_info().owner); + bucket_owner.id = bucket->get_info().owner; std::unique_ptr<rgw::sal::Object> obj = bucket->get_object(path.obj_key); obj->set_atomic(); @@ -7749,8 +7702,8 @@ bool RGWBulkUploadOp::handle_file_verify_permission(RGWBucketInfo& binfo, ACLOwner& bucket_owner /* out */, optional_yield y) { - RGWAccessControlPolicy bacl(driver->ctx()); - op_ret = read_bucket_policy(this, driver, s, binfo, battrs, &bacl, binfo.bucket, y); + RGWAccessControlPolicy bacl; + op_ret = read_bucket_policy(this, driver, s, binfo, battrs, bacl, binfo.bucket, y); if (op_ret < 0) { ldpp_dout(this, 20) << "cannot read_policy() for bucket" << dendl; return false; @@ -7803,8 +7756,8 @@ bool RGWBulkUploadOp::handle_file_verify_permission(RGWBucketInfo& binfo, } } - return verify_bucket_permission_no_policy(this, s, s->user_acl.get(), - &bacl, RGW_PERM_WRITE); + return verify_bucket_permission_no_policy(this, s, s->user_acl, + bacl, RGW_PERM_WRITE); } int RGWBulkUploadOp::handle_file(const std::string_view path, @@ -7860,7 +7813,7 @@ int RGWBulkUploadOp::handle_file(const std::string_view path, std::unique_ptr<rgw::sal::Writer> processor; processor = driver->get_atomic_writer(this, s->yield, obj.get(), - bowner.get_id(), + bowner.id, &s->dest_placement, 0, s->req_id); op_ret = processor->prepare(s->yield); if (op_ret < 0) { @@ -8245,7 +8198,7 @@ void RGWGetObjLayout::execute(optional_yield y) int RGWConfigBucketMetaSearch::verify_permission(optional_yield y) { - if (!s->auth.identity->is_owner_of(s->bucket_owner.get_id())) { + if (!s->auth.identity->is_owner_of(s->bucket_owner.id)) { return -EACCES; } @@ -8278,7 +8231,7 @@ void RGWConfigBucketMetaSearch::execute(optional_yield y) int RGWGetBucketMetaSearch::verify_permission(optional_yield y) { - if (!s->auth.identity->is_owner_of(s->bucket_owner.get_id())) { + if (!s->auth.identity->is_owner_of(s->bucket_owner.id)) { return -EACCES; } @@ -8292,7 +8245,7 @@ void RGWGetBucketMetaSearch::pre_exec() int RGWDelBucketMetaSearch::verify_permission(optional_yield y) { - if (!s->auth.identity->is_owner_of(s->bucket_owner.get_id())) { + if (!s->auth.identity->is_owner_of(s->bucket_owner.id)) { return -EACCES; } @@ -8915,7 +8868,7 @@ int RGWGetBucketPolicyStatus::verify_permission(optional_yield y) void RGWGetBucketPolicyStatus::execute(optional_yield y) { - isPublic = (s->iam_policy && rgw::IAM::is_public(*s->iam_policy)) || s->bucket_acl->is_public(this); + isPublic = (s->iam_policy && rgw::IAM::is_public(*s->iam_policy)) || s->bucket_acl.is_public(this); } int RGWPutBucketPublicAccessBlock::verify_permission(optional_yield y) diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h index be7232ae392..9314d454c79 100644 --- a/src/rgw/rgw_op.h +++ b/src/rgw/rgw_op.h @@ -79,7 +79,7 @@ int rgw_op_get_bucket_policy_from_attr(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, const rgw_user& bucket_owner, std::map<std::string, bufferlist>& bucket_attrs, - RGWAccessControlPolicy *policy, + RGWAccessControlPolicy& policy, optional_yield y); class RGWHandler { @@ -420,7 +420,7 @@ public: int read_user_manifest_part( rgw::sal::Bucket* bucket, const rgw_bucket_dir_entry& ent, - RGWAccessControlPolicy * const bucket_acl, + const RGWAccessControlPolicy& bucket_acl, const boost::optional<rgw::IAM::Policy>& bucket_policy, const off_t start_ofs, const off_t end_ofs, @@ -1098,7 +1098,6 @@ class RGWCreateBucket : public RGWOp { void execute(optional_yield y) override; void init(rgw::sal::Driver* driver, req_state *s, RGWHandler *h) override { RGWOp::init(driver, s, h); - policy.set_ctx(s->cct); relaxed_region_enforcement = s->cct->_conf.get_val<bool>("rgw_relaxed_region_enforcement"); } @@ -1249,11 +1248,6 @@ public: delete obj_legal_hold; } - void init(rgw::sal::Driver* driver, req_state *s, RGWHandler *h) override { - RGWOp::init(driver, s, h); - policy.set_ctx(s->cct); - } - virtual int init_processing(optional_yield y) override; void emplace_attr(std::string&& key, buffer::list&& bl) { @@ -1328,11 +1322,6 @@ public: attrs.emplace(std::move(key), std::move(bl)); /* key and bl are r-value refs */ } - void init(rgw::sal::Driver* driver, req_state *s, RGWHandler *h) override { - RGWOp::init(driver, s, h); - policy.set_ctx(s->cct); - } - int verify_permission(optional_yield y) override; void pre_exec() override; void execute(optional_yield y) override; @@ -1367,10 +1356,6 @@ public: has_policy(false) { } - void init(rgw::sal::Driver* driver, req_state *s, RGWHandler *h) override { - RGWOp::init(driver, s, h); - policy.set_ctx(s->cct); - } int init_processing(optional_yield y) override; int verify_permission(optional_yield y) override; void pre_exec() override { } @@ -1406,11 +1391,6 @@ public: attrs.emplace(std::move(key), std::move(bl)); /* key and bl are r-value refs */ } - void init(rgw::sal::Driver* driver, req_state *s, RGWHandler *h) override { - RGWOp::init(driver, s, h); - policy.set_ctx(s->cct); - } - int verify_permission(optional_yield y) override; void pre_exec() override; void execute(optional_yield y) override; @@ -1433,10 +1413,6 @@ public: : dlo_manifest(NULL) {} - void init(rgw::sal::Driver* driver, req_state *s, RGWHandler *h) override { - RGWOp::init(driver, s, h); - policy.set_ctx(s->cct); - } int verify_permission(optional_yield y) override; void pre_exec() override; void execute(optional_yield y) override; @@ -1560,10 +1536,6 @@ public: attrs.emplace(std::move(key), std::move(bl)); } - void init(rgw::sal::Driver* driver, req_state *s, RGWHandler *h) override { - RGWOp::init(driver, s, h); - dest_policy.set_ctx(s->cct); - } int init_processing(optional_yield y) override; int verify_permission(optional_yield y) override; void pre_exec() override; @@ -1604,7 +1576,6 @@ public: class RGWPutACLs : public RGWOp { protected: bufferlist data; - ACLOwner owner; public: RGWPutACLs() {} @@ -1614,7 +1585,8 @@ public: void pre_exec() override; void execute(optional_yield y) override; - virtual int get_policy_from_state(rgw::sal::Driver* driver, req_state *s, std::stringstream& ss) { return 0; } + virtual int get_policy_from_state(const ACLOwner& owner, + RGWAccessControlPolicy& p) { return 0; } virtual int get_params(optional_yield y) = 0; void send_response() override = 0; const char* name() const override { return "put_acls"; } @@ -1664,7 +1636,6 @@ public: void pre_exec() override; void execute(optional_yield y) override; -// virtual int get_policy_from_state(RGWRados* driver, req_state *s, std::stringstream& ss) { return 0; } virtual int get_params(optional_yield y) = 0; void send_response() override = 0; const char* name() const override { return "put_lifecycle"; } @@ -1844,10 +1815,6 @@ protected: public: RGWInitMultipart() {} - void init(rgw::sal::Driver* driver, req_state *s, RGWHandler *h) override { - RGWOp::init(driver, s, h); - policy.set_ctx(s->cct); - } int verify_permission(optional_yield y) override; void pre_exec() override; void execute(optional_yield y) override; @@ -1919,10 +1886,6 @@ public: truncated = false; } - void init(rgw::sal::Driver* driver, req_state *s, RGWHandler *h) override { - RGWOp::init(driver, s, h); - policy = RGWAccessControlPolicy(s->cct); - } int verify_permission(optional_yield y) override; void pre_exec() override; void execute(optional_yield y) override; diff --git a/src/rgw/rgw_quota.cc b/src/rgw/rgw_quota.cc index c2f300b1fdb..836086b5be2 100644 --- a/src/rgw/rgw_quota.cc +++ b/src/rgw/rgw_quota.cc @@ -86,24 +86,15 @@ public: const DoutPrefixProvider* dpp); void adjust_stats(const rgw_user& user, rgw_bucket& bucket, int objs_delta, uint64_t added_bytes, uint64_t removed_bytes); - void set_stats(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs, RGWStorageStats& stats); + void set_stats(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs, const RGWStorageStats& stats); int async_refresh(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs); - void async_refresh_response(const rgw_user& user, rgw_bucket& bucket, RGWStorageStats& stats); + void async_refresh_response(const rgw_user& user, rgw_bucket& bucket, const RGWStorageStats& stats); void async_refresh_fail(const rgw_user& user, rgw_bucket& bucket); - class AsyncRefreshHandler { - protected: - rgw::sal::Driver* driver; - RGWQuotaCache<T> *cache; - public: - AsyncRefreshHandler(rgw::sal::Driver* _driver, RGWQuotaCache<T> *_cache) : driver(_driver), cache(_cache) {} - virtual ~AsyncRefreshHandler() {} - - virtual int init_fetch() = 0; - virtual void drop_reference() = 0; - }; - - virtual AsyncRefreshHandler *allocate_refresh_handler(const rgw_user& user, const rgw_bucket& bucket) = 0; + /// start an async refresh that will eventually call async_refresh_response or + /// async_refresh_fail. hold a reference to the waiter until completion + virtual int init_refresh(const rgw_user& user, const rgw_bucket& bucket, + boost::intrusive_ptr<RefCountedWaitObject> waiter) = 0; }; template<class T> @@ -116,31 +107,17 @@ int RGWQuotaCache<T>::async_refresh(const rgw_user& user, const rgw_bucket& buck return 0; } - async_refcount->get(); - - - AsyncRefreshHandler *handler = allocate_refresh_handler(user, bucket); - - int ret = handler->init_fetch(); - if (ret < 0) { - async_refcount->put(); - handler->drop_reference(); - return ret; - } - - return 0; + return init_refresh(user, bucket, async_refcount); } template<class T> void RGWQuotaCache<T>::async_refresh_fail(const rgw_user& user, rgw_bucket& bucket) { ldout(driver->ctx(), 20) << "async stats refresh response for bucket=" << bucket << dendl; - - async_refcount->put(); } template<class T> -void RGWQuotaCache<T>::async_refresh_response(const rgw_user& user, rgw_bucket& bucket, RGWStorageStats& stats) +void RGWQuotaCache<T>::async_refresh_response(const rgw_user& user, rgw_bucket& bucket, const RGWStorageStats& stats) { ldout(driver->ctx(), 20) << "async stats refresh response for bucket=" << bucket << dendl; @@ -149,12 +126,10 @@ void RGWQuotaCache<T>::async_refresh_response(const rgw_user& user, rgw_bucket& map_find(user, bucket, qs); set_stats(user, bucket, qs, stats); - - async_refcount->put(); } template<class T> -void RGWQuotaCache<T>::set_stats(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs, RGWStorageStats& stats) +void RGWQuotaCache<T>::set_stats(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs, const RGWStorageStats& stats) { qs.stats = stats; qs.expiration = ceph_clock_now(); @@ -246,70 +221,6 @@ void RGWQuotaCache<T>::adjust_stats(const rgw_user& user, rgw_bucket& bucket, in data_modified(user, bucket); } -class BucketAsyncRefreshHandler : public RGWQuotaCache<rgw_bucket>::AsyncRefreshHandler, - public RGWGetBucketStats_CB { - rgw_user user; -public: - BucketAsyncRefreshHandler(rgw::sal::Driver* _driver, RGWQuotaCache<rgw_bucket> *_cache, - const rgw_user& _user, const rgw_bucket& _bucket) : - RGWQuotaCache<rgw_bucket>::AsyncRefreshHandler(_driver, _cache), - RGWGetBucketStats_CB(_bucket), user(_user) {} - - void drop_reference() override { put(); } - void handle_response(int r) override; - int init_fetch() override; -}; - -int BucketAsyncRefreshHandler::init_fetch() -{ - std::unique_ptr<rgw::sal::Bucket> rbucket; - - const DoutPrefix dp(driver->ctx(), dout_subsys, "rgw bucket async refresh handler: "); - int r = driver->load_bucket(&dp, bucket, &rbucket, null_yield); - if (r < 0) { - ldpp_dout(&dp, 0) << "could not get bucket info for bucket=" << bucket << " r=" << r << dendl; - return r; - } - - ldpp_dout(&dp, 20) << "initiating async quota refresh for bucket=" << bucket << dendl; - - const auto& index = rbucket->get_info().get_current_index(); - if (is_layout_indexless(index)) { - return 0; - } - - r = rbucket->read_stats_async(&dp, index, RGW_NO_SHARD, this); - if (r < 0) { - ldpp_dout(&dp, 0) << "could not get bucket info for bucket=" << bucket.name << dendl; - - /* read_stats_async() dropped our reference already */ - return r; - } - - return 0; -} - -void BucketAsyncRefreshHandler::handle_response(const int r) -{ - if (r < 0) { - ldout(driver->ctx(), 20) << "AsyncRefreshHandler::handle_response() r=" << r << dendl; - cache->async_refresh_fail(user, bucket); - return; - } - - RGWStorageStats bs; - - for (const auto& pair : *stats) { - const RGWStorageStats& s = pair.second; - - bs.size += s.size; - bs.size_rounded += s.size_rounded; - bs.num_objects += s.num_objects; - } - - cache->async_refresh_response(user, bucket, bs); -} - class RGWBucketStatsCache : public RGWQuotaCache<rgw_bucket> { protected: bool map_find(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs) override { @@ -330,9 +241,8 @@ public: explicit RGWBucketStatsCache(rgw::sal::Driver* _driver) : RGWQuotaCache<rgw_bucket>(_driver, _driver->ctx()->_conf->rgw_bucket_quota_cache_size) { } - AsyncRefreshHandler *allocate_refresh_handler(const rgw_user& user, const rgw_bucket& bucket) override { - return new BucketAsyncRefreshHandler(driver, this, user, bucket); - } + int init_refresh(const rgw_user& user, const rgw_bucket& bucket, + boost::intrusive_ptr<RefCountedWaitObject> waiter) override; }; int RGWBucketStatsCache::fetch_stats_from_storage(const rgw_user& _u, const rgw_bucket& _b, RGWStorageStats& stats, optional_yield y, const DoutPrefixProvider *dpp) @@ -376,48 +286,57 @@ int RGWBucketStatsCache::fetch_stats_from_storage(const rgw_user& _u, const rgw_ return 0; } -class UserAsyncRefreshHandler : public RGWQuotaCache<rgw_user>::AsyncRefreshHandler, - public RGWGetUserStats_CB { - const DoutPrefixProvider *dpp; +class BucketAsyncRefreshHandler : public rgw::sal::ReadStatsCB { + RGWBucketStatsCache* cache; + boost::intrusive_ptr<RefCountedWaitObject> waiter; + rgw_user user; rgw_bucket bucket; public: - UserAsyncRefreshHandler(const DoutPrefixProvider *_dpp, rgw::sal::Driver* _driver, RGWQuotaCache<rgw_user> *_cache, - const rgw_user& _user, const rgw_bucket& _bucket) : - RGWQuotaCache<rgw_user>::AsyncRefreshHandler(_driver, _cache), - RGWGetUserStats_CB(_user), - dpp(_dpp), - bucket(_bucket) {} - - void drop_reference() override { put(); } - int init_fetch() override; - void handle_response(int r) override; + BucketAsyncRefreshHandler(RGWBucketStatsCache* cache, + boost::intrusive_ptr<RefCountedWaitObject> waiter, + const rgw_user& user, const rgw_bucket& bucket) + : cache(cache), waiter(std::move(waiter)), user(user), bucket(bucket) {} + + void handle_response(int r, const RGWStorageStats& stats) override { + if (r < 0) { + cache->async_refresh_fail(user, bucket); + return; + } + + cache->async_refresh_response(user, bucket, stats); + } }; -int UserAsyncRefreshHandler::init_fetch() + +int RGWBucketStatsCache::init_refresh(const rgw_user& user, const rgw_bucket& bucket, + boost::intrusive_ptr<RefCountedWaitObject> waiter) { - std::unique_ptr<rgw::sal::User> ruser = driver->get_user(user); + std::unique_ptr<rgw::sal::Bucket> rbucket; - ldpp_dout(dpp, 20) << "initiating async quota refresh for user=" << user << dendl; - int r = ruser->read_stats_async(dpp, this); + const DoutPrefix dp(driver->ctx(), dout_subsys, "rgw bucket async refresh handler: "); + int r = driver->load_bucket(&dp, bucket, &rbucket, null_yield); if (r < 0) { - ldpp_dout(dpp, 0) << "could not get bucket info for user=" << user << dendl; - - /* get_bucket_stats_async() dropped our reference already */ + ldpp_dout(&dp, 0) << "could not get bucket info for bucket=" << bucket << " r=" << r << dendl; return r; } - return 0; -} + ldpp_dout(&dp, 20) << "initiating async quota refresh for bucket=" << bucket << dendl; -void UserAsyncRefreshHandler::handle_response(int r) -{ + const auto& index = rbucket->get_info().get_current_index(); + if (is_layout_indexless(index)) { + return 0; + } + + boost::intrusive_ptr handler = new BucketAsyncRefreshHandler( + this, std::move(waiter), user, bucket); + + r = rbucket->read_stats_async(&dp, index, RGW_NO_SHARD, std::move(handler)); if (r < 0) { - ldout(driver->ctx(), 20) << "AsyncRefreshHandler::handle_response() r=" << r << dendl; - cache->async_refresh_fail(user, bucket); - return; + ldpp_dout(&dp, 0) << "could not get bucket stats for bucket=" << bucket.name << dendl; + return r; } - cache->async_refresh_response(user, bucket, stats); + return 0; } class RGWUserStatsCache : public RGWQuotaCache<rgw_user> { @@ -574,9 +493,8 @@ public: stop(); } - AsyncRefreshHandler *allocate_refresh_handler(const rgw_user& user, const rgw_bucket& bucket) override { - return new UserAsyncRefreshHandler(dpp, driver, this, user, bucket); - } + int init_refresh(const rgw_user& user, const rgw_bucket& bucket, + boost::intrusive_ptr<RefCountedWaitObject> waiter) override; bool going_down() { return down_flag; @@ -592,6 +510,49 @@ public: } }; +class UserAsyncRefreshHandler : public rgw::sal::ReadStatsCB { + RGWUserStatsCache* cache; + boost::intrusive_ptr<RefCountedWaitObject> waiter; + rgw_bucket bucket; + rgw_user user; + public: + UserAsyncRefreshHandler(RGWUserStatsCache* cache, + boost::intrusive_ptr<RefCountedWaitObject> waiter, + const rgw_user& user, const rgw_bucket& bucket) + : cache(cache), waiter(std::move(waiter)), bucket(bucket), user(user) + {} + + void handle_response(int r, const RGWStorageStats& stats) override; +}; + +int RGWUserStatsCache::init_refresh(const rgw_user& user, const rgw_bucket& bucket, + boost::intrusive_ptr<RefCountedWaitObject> waiter) +{ + boost::intrusive_ptr handler = new UserAsyncRefreshHandler( + this, std::move(waiter), user, bucket); + + std::unique_ptr<rgw::sal::User> ruser = driver->get_user(user); + + ldpp_dout(dpp, 20) << "initiating async quota refresh for user=" << user << dendl; + int r = ruser->read_stats_async(dpp, std::move(handler)); + if (r < 0) { + ldpp_dout(dpp, 0) << "could not get bucket info for user=" << user << dendl; + return r; + } + + return 0; +} + +void UserAsyncRefreshHandler::handle_response(int r, const RGWStorageStats& stats) +{ + if (r < 0) { + cache->async_refresh_fail(user, bucket); + return; + } + + cache->async_refresh_response(user, bucket, stats); +} + int RGWUserStatsCache::fetch_stats_from_storage(const rgw_user& _u, const rgw_bucket& _b, RGWStorageStats& stats, diff --git a/src/rgw/rgw_rest_client.cc b/src/rgw/rgw_rest_client.cc index 1ccb813a109..b1ee1a9340e 100644 --- a/src/rgw/rgw_rest_client.cc +++ b/src/rgw/rgw_rest_client.cc @@ -3,6 +3,7 @@ #include "rgw_common.h" #include "rgw_rest_client.h" +#include "rgw_acl_s3.h" #include "rgw_auth_s3.h" #include "rgw_http_errors.h" @@ -497,28 +498,22 @@ RGWRESTStreamS3PutObj::~RGWRESTStreamS3PutObj() delete out_cb; } -static void grants_by_type_add_one_grant(map<int, string>& grants_by_type, int perm, ACLGrant& grant) +static void grants_by_type_add_one_grant(map<int, string>& grants_by_type, int perm, const ACLGrant& grant) { string& s = grants_by_type[perm]; if (!s.empty()) s.append(", "); - string id_type_str; - ACLGranteeType& type = grant.get_type(); - switch (type.get_type()) { - case ACL_TYPE_GROUP: - id_type_str = "uri"; - break; - case ACL_TYPE_EMAIL_USER: - id_type_str = "emailAddress"; - break; - default: - id_type_str = "id"; - } - rgw_user id; - grant.get_id(id); - s.append(id_type_str + "=\"" + id.to_str() + "\""); + if (const auto user = grant.get_user(); user) { + s.append("id=\"" + user->id.to_str() + "\""); + } else if (const auto email = grant.get_email(); email) { + s.append("emailAddress=\"" + email->address + "\""); + } else if (const auto group = grant.get_group(); group) { + std::string uri; + rgw::s3::acl_group_to_uri(group->type, uri); + s.append("uri=\"" + uri + "\""); + } } struct grant_type_to_header { @@ -535,7 +530,7 @@ struct grant_type_to_header grants_headers_def[] = { { 0, NULL} }; -static bool grants_by_type_check_perm(map<int, string>& grants_by_type, int perm, ACLGrant& grant, int check_perm) +static bool grants_by_type_check_perm(map<int, string>& grants_by_type, int perm, const ACLGrant& grant, int check_perm) { if ((perm & check_perm) == check_perm) { grants_by_type_add_one_grant(grants_by_type, check_perm, grant); @@ -544,7 +539,7 @@ static bool grants_by_type_check_perm(map<int, string>& grants_by_type, int perm return false; } -static void grants_by_type_add_perm(map<int, string>& grants_by_type, int perm, ACLGrant& grant) +static void grants_by_type_add_perm(map<int, string>& grants_by_type, int perm, const ACLGrant& grant) { struct grant_type_to_header *t; @@ -669,16 +664,13 @@ void RGWRESTGenerateHTTPHeaders::set_http_attrs(const map<string, string>& http_ } } -void RGWRESTGenerateHTTPHeaders::set_policy(RGWAccessControlPolicy& policy) +void RGWRESTGenerateHTTPHeaders::set_policy(const RGWAccessControlPolicy& policy) { /* update acl headers */ - RGWAccessControlList& acl = policy.get_acl(); - multimap<string, ACLGrant>& grant_map = acl.get_grant_map(); - multimap<string, ACLGrant>::iterator giter; + const RGWAccessControlList& acl = policy.get_acl(); map<int, string> grants_by_type; - for (giter = grant_map.begin(); giter != grant_map.end(); ++giter) { - ACLGrant& grant = giter->second; - ACLPermission& perm = grant.get_permission(); + for (const auto& [id, grant] : acl.get_grant_map()) { + ACLPermission perm = grant.get_permission(); grants_by_type_add_perm(grants_by_type, perm.get_permissions(), grant); } add_grants_headers(grants_by_type, *new_env, new_info->x_meta_map); diff --git a/src/rgw/rgw_rest_client.h b/src/rgw/rgw_rest_client.h index 6e27576c676..923f8cc1783 100644 --- a/src/rgw/rgw_rest_client.h +++ b/src/rgw/rgw_rest_client.h @@ -94,7 +94,7 @@ public: void set_extra_headers(const std::map<std::string, std::string>& extra_headers); int set_obj_attrs(const DoutPrefixProvider *dpp, std::map<std::string, bufferlist>& rgw_attrs); void set_http_attrs(const std::map<std::string, std::string>& http_attrs); - void set_policy(RGWAccessControlPolicy& policy); + void set_policy(const RGWAccessControlPolicy& policy); int sign(const DoutPrefixProvider *dpp, RGWAccessKey& key, const bufferlist *opt_content); const std::string& get_url() { return url; } diff --git a/src/rgw/rgw_rest_metadata.cc b/src/rgw/rgw_rest_metadata.cc index 23f78819c64..d7881c22890 100644 --- a/src/rgw/rgw_rest_metadata.cc +++ b/src/rgw/rgw_rest_metadata.cc @@ -69,9 +69,7 @@ void RGWOp_Metadata_Get::execute(optional_yield y) { } void RGWOp_Metadata_Get_Myself::execute(optional_yield y) { - string owner_id; - - owner_id = s->owner.get_id().to_str(); + const std::string owner_id = s->owner.id.to_str(); s->info.args.append("key", owner_id); return RGWOp_Metadata_Get::execute(y); diff --git a/src/rgw/rgw_rest_pubsub.cc b/src/rgw/rgw_rest_pubsub.cc index 843b7eb7d08..f43a9ef2722 100644 --- a/src/rgw/rgw_rest_pubsub.cc +++ b/src/rgw/rgw_rest_pubsub.cc @@ -72,7 +72,7 @@ std::optional<rgw::IAM::Policy> get_policy_from_text(req_state* const s, const auto bl = bufferlist::static_from_string(policy_text); try { return rgw::IAM::Policy( - s->cct, s->owner.get_id().tenant, bl, + s->cct, s->owner.id.tenant, bl, s->cct->_conf.get_val<bool>("rgw_policy_reject_invalid_principals")); } catch (rgw::IAM::PolicyParseException& e) { ldout(s->cct, 1) << "failed to parse policy:' " << policy_text @@ -86,7 +86,7 @@ int verify_topic_owner_or_policy(req_state* const s, const rgw_pubsub_topic& topic, const std::string& zonegroup_name, const uint64_t op) { - if (topic.user == s->owner.get_id()) { + if (topic.user == s->owner.id) { return 0; } // no policy set. @@ -187,7 +187,7 @@ class RGWPSCreateTopicOp : public RGWOp { return ret; } - const RGWPubSub ps(driver, s->owner.get_id().tenant); + const RGWPubSub ps(driver, s->owner.id.tenant); rgw_pubsub_topic result; ret = ps.get_topic(this, topic_name, result, y); if (ret == -ENOENT) { @@ -195,7 +195,7 @@ class RGWPSCreateTopicOp : public RGWOp { return 0; } if (ret == 0) { - if (result.user == s->owner.get_id() || + if (result.user == s->owner.id || !s->cct->_conf->mandatory_topic_permissions) { return 0; } @@ -251,9 +251,9 @@ void RGWPSCreateTopicOp::execute(optional_yield y) { return; } } - const RGWPubSub ps(driver, s->owner.get_id().tenant); + const RGWPubSub ps(driver, s->owner.id.tenant); op_ret = ps.create_topic(this, topic_name, dest, topic_arn, opaque_data, - s->owner.get_id(), policy_text, y); + s->owner.id, policy_text, y); if (op_ret < 0) { ldpp_dout(this, 1) << "failed to create topic '" << topic_name << "', ret=" << op_ret << dendl; return; @@ -306,7 +306,7 @@ public: }; void RGWPSListTopicsOp::execute(optional_yield y) { - const RGWPubSub ps(driver, s->owner.get_id().tenant); + const RGWPubSub ps(driver, s->owner.id.tenant); op_ret = ps.get_topics(this, result, y); // if there are no topics it is not considered an error op_ret = op_ret == -ENOENT ? 0 : op_ret; @@ -393,7 +393,7 @@ void RGWPSGetTopicOp::execute(optional_yield y) { if (op_ret < 0) { return; } - const RGWPubSub ps(driver, s->owner.get_id().tenant); + const RGWPubSub ps(driver, s->owner.id.tenant); op_ret = ps.get_topic(this, topic_name, result, y); if (op_ret < 0) { ldpp_dout(this, 1) << "failed to get topic '" << topic_name << "', ret=" << op_ret << dendl; @@ -477,7 +477,7 @@ void RGWPSGetTopicAttributesOp::execute(optional_yield y) { if (op_ret < 0) { return; } - const RGWPubSub ps(driver, s->owner.get_id().tenant); + const RGWPubSub ps(driver, s->owner.id.tenant); op_ret = ps.get_topic(this, topic_name, result, y); if (op_ret < 0) { ldpp_dout(this, 1) << "failed to get topic '" << topic_name << "', ret=" << op_ret << dendl; @@ -605,7 +605,7 @@ class RGWPSSetTopicAttributesOp : public RGWOp { return ret; } rgw_pubsub_topic result; - const RGWPubSub ps(driver, s->owner.get_id().tenant); + const RGWPubSub ps(driver, s->owner.id.tenant); ret = ps.get_topic(this, topic_name, result, y); if (ret < 0) { ldpp_dout(this, 1) << "failed to get topic '" << topic_name @@ -672,7 +672,7 @@ void RGWPSSetTopicAttributesOp::execute(optional_yield y) { return; } } - const RGWPubSub ps(driver, s->owner.get_id().tenant); + const RGWPubSub ps(driver, s->owner.id.tenant); op_ret = ps.create_topic(this, topic_name, dest, topic_arn, opaque_data, topic_owner, policy_text, y); if (op_ret < 0) { @@ -742,7 +742,7 @@ void RGWPSDeleteTopicOp::execute(optional_yield y) { if (op_ret < 0) { return; } - const RGWPubSub ps(driver, s->owner.get_id().tenant); + const RGWPubSub ps(driver, s->owner.id.tenant); rgw_pubsub_topic result; op_ret = ps.get_topic(this, topic_name, result, y); if (op_ret == 0) { @@ -970,7 +970,7 @@ void RGWPSCreateNotifOp::execute(optional_yield y) { return; } - const RGWPubSub ps(driver, s->owner.get_id().tenant); + const RGWPubSub ps(driver, s->owner.id.tenant); const RGWPubSub::Bucket b(ps, bucket.get()); if(configurations.list.empty()) { @@ -1042,7 +1042,7 @@ void RGWPSCreateNotifOp::execute(optional_yield y) { // ARN is cached to make the "GET" method faster op_ret = ps.create_topic(this, unique_topic_name, topic_info.dest, topic_info.arn, topic_info.opaque_data, - s->owner.get_id(), topic_info.policy_text, y); + s->owner.id, topic_info.policy_text, y); if (op_ret < 0) { ldpp_dout(this, 1) << "failed to auto-generate unique topic '" << unique_topic_name << "', ret=" << op_ret << dendl; @@ -1118,7 +1118,7 @@ void RGWPSDeleteNotifOp::execute(optional_yield y) { return; } - const RGWPubSub ps(driver, s->owner.get_id().tenant); + const RGWPubSub ps(driver, s->owner.id.tenant); const RGWPubSub::Bucket b(ps, bucket.get()); // get all topics on a bucket @@ -1215,7 +1215,7 @@ void RGWPSListNotifsOp::execute(optional_yield y) { return; } - const RGWPubSub ps(driver, s->owner.get_id().tenant); + const RGWPubSub ps(driver, s->owner.id.tenant); const RGWPubSub::Bucket b(ps, bucket.get()); // get all topics on a bucket diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc index f12d5e73300..8e0e5a9924c 100644 --- a/src/rgw/rgw_rest_s3.cc +++ b/src/rgw/rgw_rest_s3.cc @@ -2383,17 +2383,19 @@ void RGWStatBucket_ObjStore_S3::send_response() } static int create_s3_policy(req_state *s, rgw::sal::Driver* driver, - RGWAccessControlPolicy_S3& s3policy, - ACLOwner& owner) + RGWAccessControlPolicy& policy, + const ACLOwner& owner) { if (s->has_acl_header) { if (!s->canned_acl.empty()) return -ERR_INVALID_REQUEST; - return s3policy.create_from_headers(s, driver, s->info.env, owner); + return rgw::s3::create_policy_from_headers(s, driver, owner, + *s->info.env, policy); } - return s3policy.create_canned(owner, s->bucket_owner, s->canned_acl); + return rgw::s3::create_canned_acl(owner, s->bucket_owner, + s->canned_acl, policy); } class RGWLocationConstraint : public XMLObj @@ -2447,7 +2449,6 @@ public: int RGWCreateBucket_ObjStore_S3::get_params(optional_yield y) { - RGWAccessControlPolicy_S3 s3policy(s->cct); bool relaxed_names = s->cct->_conf->rgw_relaxed_s3_bucket_names; int r; @@ -2456,12 +2457,10 @@ int RGWCreateBucket_ObjStore_S3::get_params(optional_yield y) if (r) return r; } - r = create_s3_policy(s, driver, s3policy, s->owner); + r = create_s3_policy(s, driver, policy, s->owner); if (r < 0) return r; - policy = s3policy; - const auto max_size = s->cct->_conf->rgw_max_put_param_size; int op_ret = 0; @@ -2589,13 +2588,10 @@ int RGWPutObj_ObjStore_S3::get_params(optional_yield y) return ret; } - RGWAccessControlPolicy_S3 s3policy(s->cct); - ret = create_s3_policy(s, driver, s3policy, s->owner); + ret = create_s3_policy(s, driver, policy, s->owner); if (ret < 0) return ret; - policy = s3policy; - if_match = s->info.env->get("HTTP_IF_MATCH"); if_nomatch = s->info.env->get("HTTP_IF_NONE_MATCH"); @@ -3162,8 +3158,8 @@ int RGWPostObj_ObjStore_S3::get_policy(optional_yield y) return -EACCES; } else { /* Populate the owner info. */ - s->owner.set_id(s->user->get_id()); - s->owner.set_name(s->user->get_display_name()); + s->owner.id = s->user->get_id(); + s->owner.display_name = s->user->get_display_name(); ldpp_dout(this, 20) << "Successful Signature Verification!" << dendl; } @@ -3215,15 +3211,14 @@ int RGWPostObj_ObjStore_S3::get_policy(optional_yield y) string canned_acl; part_str(parts, "acl", &canned_acl); - RGWAccessControlPolicy_S3 s3policy(s->cct); ldpp_dout(this, 20) << "canned_acl=" << canned_acl << dendl; - if (s3policy.create_canned(s->owner, s->bucket_owner, canned_acl) < 0) { + int r = rgw::s3::create_canned_acl(s->owner, s->bucket_owner, + canned_acl, policy); + if (r < 0) { err_msg = "Bad canned ACLs"; - return -EINVAL; + return r; } - policy = s3policy; - return 0; } @@ -3383,6 +3378,9 @@ done: if (op_ret >= 0) { dump_content_length(s, s->formatter->get_len()); } + if (op_ret == STATUS_NO_CONTENT) { + dump_etag(s, etag); + } end_header(s, this); if (op_ret != STATUS_CREATED) return; @@ -3450,16 +3448,8 @@ void RGWDeleteObj_ObjStore_S3::send_response() int RGWCopyObj_ObjStore_S3::init_dest_policy() { - RGWAccessControlPolicy_S3 s3policy(s->cct); - /* build a policy for the target object */ - int r = create_s3_policy(s, driver, s3policy, s->owner); - if (r < 0) - return r; - - dest_policy = s3policy; - - return 0; + return create_s3_policy(s, driver, dest_policy, s->owner); } int RGWCopyObj_ObjStore_S3::get_params(optional_yield y) @@ -3621,25 +3611,16 @@ int RGWPutACLs_ObjStore_S3::get_params(optional_yield y) return ret; } -int RGWPutACLs_ObjStore_S3::get_policy_from_state(rgw::sal::Driver* driver, - req_state *s, - stringstream& ss) +int RGWPutACLs_ObjStore_S3::get_policy_from_state(const ACLOwner& owner, + RGWAccessControlPolicy& policy) { - RGWAccessControlPolicy_S3 s3policy(s->cct); - // bucket-* canned acls do not apply to bucket if (rgw::sal::Object::empty(s->object.get())) { if (s->canned_acl.find("bucket") != string::npos) s->canned_acl.clear(); } - int r = create_s3_policy(s, driver, s3policy, owner); - if (r < 0) - return r; - - s3policy.to_xml(ss); - - return 0; + return create_s3_policy(s, driver, policy, owner); } void RGWPutACLs_ObjStore_S3::send_response() @@ -3972,14 +3953,7 @@ int RGWInitMultipart_ObjStore_S3::get_params(optional_yield y) return ret; } - RGWAccessControlPolicy_S3 s3policy(s->cct); - ret = create_s3_policy(s, driver, s3policy, s->owner); - if (ret < 0) - return ret; - - policy = s3policy; - - return 0; + return create_s3_policy(s, driver, policy, s->owner); } void RGWInitMultipart_ObjStore_S3::send_response() @@ -4107,7 +4081,7 @@ void RGWListMultipart_ObjStore_S3::send_response() s->formatter->dump_string("IsTruncated", (truncated ? "true" : "false")); ACLOwner& owner = policy.get_owner(); - dump_owner(s, owner.get_id(), owner.get_display_name()); + dump_owner(s, owner.id, owner.display_name); for (; iter != upload->get_parts().end(); ++iter) { rgw::sal::MultipartPart* part = iter->second.get(); @@ -4170,8 +4144,8 @@ void RGWListBucketMultiparts_ObjStore_S3::send_response() } s->formatter->dump_string("UploadId", upload->get_upload_id()); const ACLOwner& owner = upload->get_owner(); - dump_owner(s, owner.get_id(), owner.get_display_name(), "Initiator"); - dump_owner(s, owner.get_id(), owner.get_display_name()); // Owner + dump_owner(s, owner.id, owner.display_name, "Initiator"); + dump_owner(s, owner.id, owner.display_name); // Owner s->formatter->dump_string("StorageClass", "STANDARD"); dump_time(s, "Initiated", upload->get_mtime()); s->formatter->close_section(); @@ -5098,8 +5072,8 @@ int RGW_Auth_S3::authorize(const DoutPrefixProvider *dpp, const auto ret = rgw::auth::Strategy::apply(dpp, auth_registry.get_s3_main(), s, y); if (ret == 0) { /* Populate the owner info. */ - s->owner.set_id(s->user->get_id()); - s->owner.set_name(s->user->get_display_name()); + s->owner.id = s->user->get_id(); + s->owner.display_name = s->user->get_display_name(); } return ret; } diff --git a/src/rgw/rgw_rest_s3.h b/src/rgw/rgw_rest_s3.h index c73971a5fe6..d15ddaba35a 100644 --- a/src/rgw/rgw_rest_s3.h +++ b/src/rgw/rgw_rest_s3.h @@ -355,7 +355,8 @@ public: RGWPutACLs_ObjStore_S3() {} ~RGWPutACLs_ObjStore_S3() override {} - int get_policy_from_state(rgw::sal::Driver* driver, req_state *s, std::stringstream& ss) override; + int get_policy_from_state(const ACLOwner& owner, + RGWAccessControlPolicy& p) override; void send_response() override; int get_params(optional_yield y) override; }; diff --git a/src/rgw/rgw_rest_swift.cc b/src/rgw/rgw_rest_swift.cc index 25ba6184161..46f02805cd1 100644 --- a/src/rgw/rgw_rest_swift.cc +++ b/src/rgw/rgw_rest_swift.cc @@ -92,7 +92,7 @@ static void dump_account_metadata(req_state * const s, /* const */map<string, bufferlist>& attrs, const RGWQuotaInfo& quota, int32_t max_buckets, - const RGWAccessControlPolicy_SWIFTAcct &policy) + const RGWAccessControlPolicy& policy) { /* Adding X-Timestamp to keep align with Swift API */ dump_header(s, "X-Timestamp", ceph_clock_now()); @@ -164,8 +164,8 @@ static void dump_account_metadata(req_state * const s, } } - /* Dump account ACLs */ - auto account_acls = policy.to_str(); + /* Dump account ACLs, if any */ + auto account_acls = rgw::swift::format_account_acl(policy); if (account_acls) { dump_header(s, "X-Account-Access-Control", std::move(*account_acls)); } @@ -188,7 +188,7 @@ void RGWListBuckets_ObjStore_SWIFT::send_response_begin(bool has_buckets) s->user->get_attrs(), s->user->get_info().quota.user_quota, s->user->get_max_buckets(), - static_cast<RGWAccessControlPolicy_SWIFTAcct&>(*s->user_acl)); + s->user_acl); dump_errno(s); dump_header(s, "Accept-Ranges", "bytes"); end_header(s, NULL, NULL, NO_CONTENT_LENGTH, true); @@ -287,7 +287,7 @@ void RGWListBuckets_ObjStore_SWIFT::send_response_end() s->user->get_attrs(), s->user->get_info().quota.user_quota, s->user->get_max_buckets(), - static_cast<RGWAccessControlPolicy_SWIFTAcct&>(*s->user_acl)); + s->user_acl); dump_errno(s); end_header(s, nullptr, nullptr, s->formatter->get_len(), true); } @@ -470,10 +470,8 @@ static void dump_container_metadata(req_state *s, } if (rgw::sal::Object::empty(s->object.get())) { - auto swift_policy = \ - static_cast<RGWAccessControlPolicy_SWIFT*>(s->bucket_acl.get()); std::string read_acl, write_acl; - swift_policy->to_str(read_acl, write_acl); + rgw::swift::format_container_acls(s->bucket_acl, read_acl, write_acl); if (read_acl.size()) { dump_header(s, "X-Container-Read", read_acl); @@ -567,7 +565,7 @@ void RGWStatAccount_ObjStore_SWIFT::send_response() attrs, s->user->get_info().quota.user_quota, s->user->get_max_buckets(), - static_cast<RGWAccessControlPolicy_SWIFTAcct&>(*s->user_acl)); + s->user_acl); } set_req_state_err(s, op_ret); @@ -595,7 +593,7 @@ void RGWStatBucket_ObjStore_SWIFT::send_response() static int get_swift_container_settings(req_state * const s, rgw::sal::Driver* const driver, - RGWAccessControlPolicy * const policy, + RGWAccessControlPolicy& policy, bool * const has_policy, uint32_t * rw_mask, RGWCORSConfiguration * const cors_config, @@ -607,18 +605,17 @@ static int get_swift_container_settings(req_state * const s, *has_policy = false; if (read_list || write_list) { - RGWAccessControlPolicy_SWIFT swift_policy(s->cct); - const auto r = swift_policy.create(s, driver, - s->user->get_id(), - s->user->get_display_name(), - read_list, - write_list, - *rw_mask); + int r = rgw::swift::create_container_policy(s, driver, + s->user->get_id(), + s->user->get_display_name(), + read_list, + write_list, + *rw_mask, + policy); if (r < 0) { return r; } - *policy = swift_policy; *has_policy = true; } @@ -716,7 +713,7 @@ int RGWCreateBucket_ObjStore_SWIFT::get_params(optional_yield y) bool has_policy; uint32_t policy_rw_mask = 0; - int r = get_swift_container_settings(s, driver, &policy, &has_policy, + int r = get_swift_container_settings(s, driver, policy, &has_policy, &policy_rw_mask, &cors_config, &has_cors); if (r < 0) { return r; @@ -1060,23 +1057,21 @@ void RGWPutObj_ObjStore_SWIFT::send_response() static int get_swift_account_settings(req_state * const s, rgw::sal::Driver* const driver, - RGWAccessControlPolicy_SWIFTAcct* const policy, + RGWAccessControlPolicy& policy, bool * const has_policy) { *has_policy = false; const char * const acl_attr = s->info.env->get("HTTP_X_ACCOUNT_ACCESS_CONTROL"); if (acl_attr) { - RGWAccessControlPolicy_SWIFTAcct swift_acct_policy(s->cct); - const bool r = swift_acct_policy.create(s, driver, - s->user->get_id(), - s->user->get_display_name(), - string(acl_attr)); - if (r != true) { - return -EINVAL; + int r = rgw::swift::create_account_policy(s, driver, + s->user->get_id(), + s->user->get_display_name(), + acl_attr, policy); + if (r < 0) { + return r; } - *policy = swift_acct_policy; *has_policy = true; } @@ -1089,12 +1084,7 @@ int RGWPutMetadataAccount_ObjStore_SWIFT::get_params(optional_yield y) return -EINVAL; } - int ret = get_swift_account_settings(s, - driver, - // FIXME: we need to carry unique_ptr in generic class - // and allocate appropriate ACL class in the ctor - static_cast<RGWAccessControlPolicy_SWIFTAcct *>(&policy), - &has_policy); + int ret = get_swift_account_settings(s, driver, policy, &has_policy); if (ret < 0) { return ret; } @@ -1127,7 +1117,7 @@ int RGWPutMetadataBucket_ObjStore_SWIFT::get_params(optional_yield y) return -EINVAL; } - int r = get_swift_container_settings(s, driver, &policy, &has_policy, + int r = get_swift_container_settings(s, driver, policy, &has_policy, &policy_rw_mask, &cors_config, &has_cors); if (r < 0) { return r; diff --git a/src/rgw/rgw_sal.cc b/src/rgw/rgw_sal.cc index 9aada70899f..6c0212dae81 100644 --- a/src/rgw/rgw_sal.cc +++ b/src/rgw/rgw_sal.cc @@ -270,17 +270,18 @@ rgw::sal::Driver* DriverManager::init_raw_storage_provider(const DoutPrefixProvi rados->set_context(cct); - int ret = rados->init_svc(true, dpp); - if (ret < 0) { - ldout(cct, 0) << "ERROR: failed to init services (ret=" << cpp_strerror(-ret) << ")" << dendl; + if (rados->init_rados() < 0) { delete driver; return nullptr; } - if (rados->init_rados() < 0) { + int ret = rados->init_svc(true, dpp); + if (ret < 0) { + ldout(cct, 0) << "ERROR: failed to init services (ret=" << cpp_strerror(-ret) << ")" << dendl; delete driver; return nullptr; } + if (driver->initialize(cct, dpp) < 0) { delete driver; return nullptr; diff --git a/src/rgw/rgw_sal.h b/src/rgw/rgw_sal.h index cc45d5f3a2c..8771b74c87a 100644 --- a/src/rgw/rgw_sal.h +++ b/src/rgw/rgw_sal.h @@ -15,6 +15,9 @@ #pragma once +#include <boost/intrusive_ptr.hpp> +#include <boost/smart_ptr/intrusive_ref_counter.hpp> + #include "common/tracer.h" #include "rgw_sal_fwd.h" #include "rgw_lua.h" @@ -28,7 +31,6 @@ struct RGWBucketEnt; class RGWRESTMgr; -class RGWAccessListFilter; class RGWLC; struct rgw_user_bucket; class RGWUsageBatch; @@ -81,32 +83,6 @@ struct RGWClusterStat { uint64_t num_objects; }; -class RGWGetBucketStats_CB : public RefCountedObject { -protected: - rgw_bucket bucket; - std::map<RGWObjCategory, RGWStorageStats>* stats; -public: - explicit RGWGetBucketStats_CB(const rgw_bucket& _bucket) : bucket(_bucket), stats(NULL) {} - ~RGWGetBucketStats_CB() override {} - virtual void handle_response(int r) = 0; - virtual void set_response(std::map<RGWObjCategory, RGWStorageStats>* _stats) { - stats = _stats; - } -}; - -class RGWGetUserStats_CB : public RefCountedObject { -protected: - rgw_user user; - RGWStorageStats stats; -public: - explicit RGWGetUserStats_CB(const rgw_user& _user) : user(_user) {} - ~RGWGetUserStats_CB() override {} - virtual void handle_response(int r) = 0; - virtual void set_response(RGWStorageStats& _stats) { - stats = _stats; - } -}; - struct RGWObjState { rgw_obj obj; bool is_atomic{false}; @@ -449,6 +425,14 @@ class Driver { virtual void register_admin_apis(RGWRESTMgr* mgr) = 0; }; + +/// \brief Ref-counted callback object for User/Bucket read_stats_async(). +class ReadStatsCB : public boost::intrusive_ref_counter<ReadStatsCB> { + public: + virtual ~ReadStatsCB() {} + virtual void handle_response(int r, const RGWStorageStats& stats) = 0; +}; + /** * @brief A list of buckets * @@ -530,7 +514,8 @@ class User { ceph::real_time* last_stats_sync = nullptr, ceph::real_time* last_stats_update = nullptr) = 0; /** Read the User stats from the backing Store, asynchronous */ - virtual int read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb) = 0; + virtual int read_stats_async(const DoutPrefixProvider *dpp, + boost::intrusive_ptr<ReadStatsCB> cb) = 0; /** Flush accumulated stat changes for this User to the backing store */ virtual int complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) = 0; /** Read detailed usage stats for this User from the backing store */ @@ -596,7 +581,7 @@ class Bucket { rgw_obj_key end_marker; std::string ns; bool enforce_ns{true}; - RGWAccessListFilter* access_list_filter{nullptr}; + rgw::AccessListFilter access_list_filter{}; RGWBucketListNameFilter force_check_filter; bool list_versions{false}; bool allow_unordered{false}; @@ -683,7 +668,7 @@ class Bucket { /** Read the bucket stats from the backing Store, asynchronous */ virtual int read_stats_async(const DoutPrefixProvider *dpp, const bucket_index_layout_generation& idx_layout, - int shard_id, RGWGetBucketStats_CB* ctx) = 0; + int shard_id, boost::intrusive_ptr<ReadStatsCB> cb) = 0; /** Sync this bucket's stats to the owning user's stats in the backing store */ virtual int sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y, RGWBucketEnt* optional_ent) = 0; diff --git a/src/rgw/rgw_sal_dbstore.cc b/src/rgw/rgw_sal_dbstore.cc index 1a28a7724f4..308404c87b8 100644 --- a/src/rgw/rgw_sal_dbstore.cc +++ b/src/rgw/rgw_sal_dbstore.cc @@ -90,7 +90,7 @@ namespace rgw::sal { } /* stats - Not for first pass */ - int DBUser::read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB *cb) + int DBUser::read_stats_async(const DoutPrefixProvider *dpp, boost::intrusive_ptr<ReadStatsCB> cb) { return 0; } @@ -221,7 +221,7 @@ namespace rgw::sal { return 0; } - int DBBucket::read_stats_async(const DoutPrefixProvider *dpp, const bucket_index_layout_generation& idx_layout, int shard_id, RGWGetBucketStats_CB *ctx) + int DBBucket::read_stats_async(const DoutPrefixProvider *dpp, const bucket_index_layout_generation& idx_layout, int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx) { return 0; } @@ -354,7 +354,7 @@ namespace rgw::sal { Attrs attrs = get_attrs(); attrs[RGW_ATTR_ACL] = aclbl; - ret = store->getDB()->update_bucket(dpp, "attrs", info, false, &(acl.get_owner().get_id()), &attrs, nullptr, nullptr); + ret = store->getDB()->update_bucket(dpp, "attrs", info, false, &acl.get_owner().id, &attrs, nullptr, nullptr); return ret; } @@ -701,7 +701,7 @@ namespace rgw::sal { int DBObject::DBDeleteOp::delete_obj(const DoutPrefixProvider* dpp, optional_yield y) { - parent_op.params.bucket_owner = params.bucket_owner.get_id(); + parent_op.params.bucket_owner = params.bucket_owner.id; parent_op.params.versioning_status = params.versioning_status; parent_op.params.obj_owner = params.obj_owner; parent_op.params.olh_epoch = params.olh_epoch; @@ -793,7 +793,7 @@ namespace rgw::sal { int ret; std::unique_ptr<rgw::sal::Object::DeleteOp> del_op = meta_obj->get_delete_op(); - del_op->params.bucket_owner = bucket->get_info().owner; + del_op->params.bucket_owner.id = bucket->get_info().owner; del_op->params.versioning_status = 0; // Since the data objects are associated with meta obj till @@ -833,7 +833,7 @@ namespace rgw::sal { DB::Object::Write obj_op(&op_target); /* Create meta object */ - obj_op.meta.owner = owner.get_id(); + obj_op.meta.owner = owner.id; obj_op.meta.category = RGWObjCategory::MultiMeta; obj_op.meta.flags = PUT_OBJ_CREATE_EXCL; obj_op.meta.mtime = &mtime; @@ -1012,7 +1012,7 @@ namespace rgw::sal { DB::Object::Write obj_op(&op_target); ret = obj_op.prepare(dpp); - obj_op.meta.owner = owner.get_id(); + obj_op.meta.owner = owner.id; obj_op.meta.flags = PUT_OBJ_CREATE; obj_op.meta.category = RGWObjCategory::Main; obj_op.meta.modify_tail = true; diff --git a/src/rgw/rgw_sal_dbstore.h b/src/rgw/rgw_sal_dbstore.h index 646ee23f887..bfa424e269f 100644 --- a/src/rgw/rgw_sal_dbstore.h +++ b/src/rgw/rgw_sal_dbstore.h @@ -93,7 +93,7 @@ protected: optional_yield y, RGWStorageStats* stats, ceph::real_time *last_stats_sync = nullptr, ceph::real_time *last_stats_update = nullptr) override; - virtual int read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb) override; + virtual int read_stats_async(const DoutPrefixProvider *dpp, boost::intrusive_ptr<ReadStatsCB> cb) override; virtual int complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) override; virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries, bool* is_truncated, RGWUsageIter& usage_iter, @@ -155,7 +155,7 @@ protected: std::map<RGWObjCategory, RGWStorageStats>& stats, std::string *max_marker = nullptr, bool *syncstopped = nullptr) override; - virtual int read_stats_async(const DoutPrefixProvider *dpp, const bucket_index_layout_generation& idx_layout, int shard_id, RGWGetBucketStats_CB* ctx) override; + virtual int read_stats_async(const DoutPrefixProvider *dpp, const bucket_index_layout_generation& idx_layout, int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx) override; int sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y, RGWBucketEnt* ent) override; int check_bucket_shards(const DoutPrefixProvider *dpp, diff --git a/src/rgw/rgw_sal_filter.cc b/src/rgw/rgw_sal_filter.cc index b49c6887b62..decd648adb0 100644 --- a/src/rgw/rgw_sal_filter.cc +++ b/src/rgw/rgw_sal_filter.cc @@ -508,7 +508,7 @@ int FilterUser::read_stats(const DoutPrefixProvider *dpp, return next->read_stats(dpp, y, stats, last_stats_sync, last_stats_update); } -int FilterUser::read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb) +int FilterUser::read_stats_async(const DoutPrefixProvider *dpp, boost::intrusive_ptr<ReadStatsCB> cb) { return next->read_stats_async(dpp, cb); } @@ -613,7 +613,7 @@ int FilterBucket::read_stats(const DoutPrefixProvider *dpp, int FilterBucket::read_stats_async(const DoutPrefixProvider *dpp, const bucket_index_layout_generation& idx_layout, - int shard_id, RGWGetBucketStats_CB* ctx) + int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx) { return next->read_stats_async(dpp, idx_layout, shard_id, ctx); } diff --git a/src/rgw/rgw_sal_filter.h b/src/rgw/rgw_sal_filter.h index 239a80a6465..aebbe383714 100644 --- a/src/rgw/rgw_sal_filter.h +++ b/src/rgw/rgw_sal_filter.h @@ -338,7 +338,7 @@ public: ceph::real_time* last_stats_sync = nullptr, ceph::real_time* last_stats_update = nullptr) override; virtual int read_stats_async(const DoutPrefixProvider *dpp, - RGWGetUserStats_CB* cb) override; + boost::intrusive_ptr<ReadStatsCB> cb) override; virtual int complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) override; virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries, @@ -397,7 +397,7 @@ public: bool* syncstopped = nullptr) override; virtual int read_stats_async(const DoutPrefixProvider *dpp, const bucket_index_layout_generation& idx_layout, - int shard_id, RGWGetBucketStats_CB* ctx) override; + int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx) override; int sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y, RGWBucketEnt* ent) override; int check_bucket_shards(const DoutPrefixProvider* dpp, diff --git a/src/rgw/rgw_sal_fwd.h b/src/rgw/rgw_sal_fwd.h index 08866c2bea2..123d17a5162 100644 --- a/src/rgw/rgw_sal_fwd.h +++ b/src/rgw/rgw_sal_fwd.h @@ -15,8 +15,21 @@ #pragma once +#include <functional> +#include <string> -namespace rgw { namespace sal { +namespace rgw { +using AccessListFilter = + std::function<bool(const std::string&, std::string&)>; + +inline auto AccessListFilterPrefix(std::string prefix) { + return [prefix = std::move(prefix)](const std::string& name, + std::string& key) { + return (prefix.compare(key.substr(0, prefix.size())) == 0); + }; +} + +namespace sal { class Driver; class User; @@ -32,6 +45,9 @@ namespace rgw { namespace sal { class Zone; class LuaManager; struct RGWRoleInfo; + class DataProcessor; + class ObjectProcessor; + class ReadStatsCB; class ConfigStore; class RealmWriter; diff --git a/src/rgw/rgw_user_types.h b/src/rgw/rgw_user_types.h index c9a1a46ade1..1aaf4cfa5d3 100644 --- a/src/rgw/rgw_user_types.h +++ b/src/rgw/rgw_user_types.h @@ -26,9 +26,10 @@ #include "common/Formatter.h" struct rgw_user { + // note: order of member variables matches the sort order of operator<=> std::string tenant; - std::string id; std::string ns; + std::string id; rgw_user() {} explicit rgw_user(const std::string& s) { @@ -36,13 +37,13 @@ struct rgw_user { } rgw_user(const std::string& tenant, const std::string& id, const std::string& ns="") : tenant(tenant), - id(id), - ns(ns) { + ns(ns), + id(id) { } rgw_user(std::string&& tenant, std::string&& id, std::string&& ns="") : tenant(std::move(tenant)), - id(std::move(id)), - ns(std::move(ns)) { + ns(std::move(ns)), + id(std::move(id)) { } void encode(ceph::buffer::list& bl) const { @@ -118,40 +119,8 @@ struct rgw_user { return *this; } - int compare(const rgw_user& u) const { - int r = tenant.compare(u.tenant); - if (r != 0) - return r; - r = ns.compare(u.ns); - if (r != 0) { - return r; - } - return id.compare(u.id); - } - int compare(const std::string& str) const { - rgw_user u(str); - return compare(u); - } + friend auto operator<=>(const rgw_user&, const rgw_user&) = default; - bool operator!=(const rgw_user& rhs) const { - return (compare(rhs) != 0); - } - bool operator==(const rgw_user& rhs) const { - return (compare(rhs) == 0); - } - bool operator<(const rgw_user& rhs) const { - if (tenant < rhs.tenant) { - return true; - } else if (tenant > rhs.tenant) { - return false; - } - if (ns < rhs.ns) { - return true; - } else if (ns > rhs.ns) { - return false; - } - return (id < rhs.id); - } void dump(ceph::Formatter *f) const; static void generate_test_instances(std::list<rgw_user*>& o); }; diff --git a/src/rgw/rgw_zone.cc b/src/rgw/rgw_zone.cc index f41d987ec54..aeb58e2f48f 100644 --- a/src/rgw/rgw_zone.cc +++ b/src/rgw/rgw_zone.cc @@ -33,7 +33,6 @@ std::string zonegroup_names_oid_prefix = "zonegroups_names."; std::string RGW_DEFAULT_ZONE_ROOT_POOL = "rgw.root"; std::string RGW_DEFAULT_ZONEGROUP_ROOT_POOL = "rgw.root"; std::string RGW_DEFAULT_PERIOD_ROOT_POOL = "rgw.root"; -std::string avail_pools = ".pools.avail"; std::string default_storage_pool_suffix = "rgw.buckets.data"; } @@ -412,22 +411,14 @@ int RGWZoneParams::set_as_default(const DoutPrefixProvider *dpp, optional_yield int RGWZoneParams::create(const DoutPrefixProvider *dpp, optional_yield y, bool exclusive) { - /* check for old pools config */ - rgw_raw_obj obj(domain_root, avail_pools); - auto sysobj = sysobj_svc->get_obj(obj); - int r = sysobj.rop().stat(y, dpp); - if (r < 0) { - ldpp_dout(dpp, 10) << "couldn't find old data placement pools config, setting up new ones for the zone" << dendl; - /* a new system, let's set new placement info */ - RGWZonePlacementInfo default_placement; - default_placement.index_pool = name + "." + default_bucket_index_pool_suffix; - rgw_pool pool = name + "." + default_storage_pool_suffix; - default_placement.storage_classes.set_storage_class(RGW_STORAGE_CLASS_STANDARD, &pool, nullptr); - default_placement.data_extra_pool = name + "." + default_storage_extra_pool_suffix; - placement_pools["default-placement"] = default_placement; - } + RGWZonePlacementInfo default_placement; + default_placement.index_pool = name + "." + default_bucket_index_pool_suffix; + rgw_pool pool = name + "." + default_storage_pool_suffix; + default_placement.storage_classes.set_storage_class(RGW_STORAGE_CLASS_STANDARD, &pool, nullptr); + default_placement.data_extra_pool = name + "." + default_storage_extra_pool_suffix; + placement_pools["default-placement"] = default_placement; - r = fix_pool_names(dpp, y); + int r = fix_pool_names(dpp, y); if (r < 0) { ldpp_dout(dpp, 0) << "ERROR: fix_pool_names returned r=" << r << dendl; return r; diff --git a/src/rgw/services/svc_bi_rados.cc b/src/rgw/services/svc_bi_rados.cc index 6002b986f59..20c842c3805 100644 --- a/src/rgw/services/svc_bi_rados.cc +++ b/src/rgw/services/svc_bi_rados.cc @@ -22,29 +22,27 @@ RGWSI_BucketIndex_RADOS::RGWSI_BucketIndex_RADOS(CephContext *cct) : RGWSI_Bucke } void RGWSI_BucketIndex_RADOS::init(RGWSI_Zone *zone_svc, - RGWSI_RADOS *rados_svc, - RGWSI_BILog_RADOS *bilog_svc, - RGWDataChangesLog *datalog_rados_svc) + librados::Rados* rados_, + RGWSI_BILog_RADOS *bilog_svc, + RGWDataChangesLog *datalog_rados_svc) { svc.zone = zone_svc; - svc.rados = rados_svc; + rados = rados_; svc.bilog = bilog_svc; svc.datalog_rados = datalog_rados_svc; } int RGWSI_BucketIndex_RADOS::open_pool(const DoutPrefixProvider *dpp, const rgw_pool& pool, - RGWSI_RADOS::Pool *index_pool, + librados::IoCtx* index_pool, bool mostly_omap) { - *index_pool = svc.rados->pool(pool); - return index_pool->open(dpp, RGWSI_RADOS::OpenParams() - .set_mostly_omap(mostly_omap)); + return rgw_init_ioctx(dpp, rados, pool, *index_pool, true, mostly_omap); } int RGWSI_BucketIndex_RADOS::open_bucket_index_pool(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, - RGWSI_RADOS::Pool *index_pool) + librados::IoCtx* index_pool) { const rgw_pool& explicit_pool = bucket_info.bucket.explicit_placement.index_pool; @@ -74,7 +72,7 @@ int RGWSI_BucketIndex_RADOS::open_bucket_index_pool(const DoutPrefixProvider *dp int RGWSI_BucketIndex_RADOS::open_bucket_index_base(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, - RGWSI_RADOS::Pool *index_pool, + librados::IoCtx* index_pool, string *bucket_oid_base) { const rgw_bucket& bucket = bucket_info.bucket; @@ -96,7 +94,7 @@ int RGWSI_BucketIndex_RADOS::open_bucket_index_base(const DoutPrefixProvider *dp int RGWSI_BucketIndex_RADOS::open_bucket_index(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, - RGWSI_RADOS::Pool *index_pool, + librados::IoCtx* index_pool, string *bucket_oid) { const rgw_bucket& bucket = bucket_info.bucket; @@ -194,7 +192,7 @@ int RGWSI_BucketIndex_RADOS::open_bucket_index(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, std::optional<int> _shard_id, const rgw::bucket_index_layout_generation& idx_layout, - RGWSI_RADOS::Pool *index_pool, + librados::IoCtx* index_pool, map<int, string> *bucket_objs, map<int, string> *bucket_instance_ids) { @@ -277,32 +275,27 @@ int RGWSI_BucketIndex_RADOS::get_bucket_index_object( int RGWSI_BucketIndex_RADOS::open_bucket_index_shard(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const string& obj_key, - RGWSI_RADOS::Obj *bucket_obj, + rgw_rados_ref* bucket_obj, int *shard_id) { string bucket_oid_base; - RGWSI_RADOS::Pool pool; - - int ret = open_bucket_index_base(dpp, bucket_info, &pool, &bucket_oid_base); + int ret = open_bucket_index_base(dpp, bucket_info, &bucket_obj->ioctx, &bucket_oid_base); if (ret < 0) { ldpp_dout(dpp, 20) << __func__ << ": open_bucket_index_pool() returned " << ret << dendl; return ret; } - string oid; - const auto& current_index = bucket_info.layout.current_index; ret = get_bucket_index_object(bucket_oid_base, current_index.layout.normal, - current_index.gen, obj_key, &oid, shard_id); + current_index.gen, obj_key, + &bucket_obj->obj.oid, shard_id); if (ret < 0) { ldpp_dout(dpp, 10) << "get_bucket_index_object() returned ret=" << ret << dendl; return ret; } - *bucket_obj = svc.rados->obj(pool, oid); - return 0; } @@ -310,23 +303,19 @@ int RGWSI_BucketIndex_RADOS::open_bucket_index_shard(const DoutPrefixProvider *d const RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& index, int shard_id, - RGWSI_RADOS::Obj *bucket_obj) + rgw_rados_ref* bucket_obj) { - RGWSI_RADOS::Pool index_pool; string bucket_oid_base; - int ret = open_bucket_index_base(dpp, bucket_info, &index_pool, &bucket_oid_base); + int ret = open_bucket_index_base(dpp, bucket_info, &bucket_obj->ioctx, + &bucket_oid_base); if (ret < 0) { ldpp_dout(dpp, 20) << __func__ << ": open_bucket_index_pool() returned " << ret << dendl; return ret; } - string oid; - get_bucket_index_object(bucket_oid_base, index.layout.normal, - index.gen, shard_id, &oid); - - *bucket_obj = svc.rados->obj(index_pool, oid); + index.gen, shard_id, &bucket_obj->obj.oid); return 0; } @@ -339,7 +328,7 @@ int RGWSI_BucketIndex_RADOS::cls_bucket_head(const DoutPrefixProvider *dpp, map<int, string> *bucket_instance_ids, optional_yield y) { - RGWSI_RADOS::Pool index_pool; + librados::IoCtx index_pool; map<int, string> oids; int r = open_bucket_index(dpp, bucket_info, shard_id, idx_layout, &index_pool, &oids, bucket_instance_ids); if (r < 0) @@ -350,7 +339,8 @@ int RGWSI_BucketIndex_RADOS::cls_bucket_head(const DoutPrefixProvider *dpp, list_results.emplace(iter.first, rgw_cls_list_ret()); } - r = CLSRGWIssueGetDirHeader(index_pool.ioctx(), oids, list_results, cct->_conf->rgw_bucket_index_max_aio)(); + r = CLSRGWIssueGetDirHeader(index_pool, oids, list_results, + cct->_conf->rgw_bucket_index_max_aio)(); if (r < 0) return r; @@ -361,9 +351,9 @@ int RGWSI_BucketIndex_RADOS::cls_bucket_head(const DoutPrefixProvider *dpp, return 0; } -int RGWSI_BucketIndex_RADOS::init_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout) +int RGWSI_BucketIndex_RADOS::init_index(const DoutPrefixProvider *dpp,RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout) { - RGWSI_RADOS::Pool index_pool; + librados::IoCtx index_pool; string dir_oid = dir_oid_prefix; int r = open_bucket_index_pool(dpp, bucket_info, &index_pool); @@ -376,14 +366,14 @@ int RGWSI_BucketIndex_RADOS::init_index(const DoutPrefixProvider *dpp, RGWBucket map<int, string> bucket_objs; get_bucket_index_objects(dir_oid, idx_layout.layout.normal.num_shards, idx_layout.gen, &bucket_objs); - return CLSRGWIssueBucketIndexInit(index_pool.ioctx(), + return CLSRGWIssueBucketIndexInit(index_pool, bucket_objs, cct->_conf->rgw_bucket_index_max_aio)(); } int RGWSI_BucketIndex_RADOS::clean_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout) { - RGWSI_RADOS::Pool index_pool; + librados::IoCtx index_pool; std::string dir_oid = dir_oid_prefix; int r = open_bucket_index_pool(dpp, bucket_info, &index_pool); @@ -397,7 +387,7 @@ int RGWSI_BucketIndex_RADOS::clean_index(const DoutPrefixProvider *dpp, RGWBucke get_bucket_index_objects(dir_oid, idx_layout.layout.normal.num_shards, idx_layout.gen, &bucket_objs); - return CLSRGWIssueBucketIndexClean(index_pool.ioctx(), + return CLSRGWIssueBucketIndexClean(index_pool, bucket_objs, cct->_conf->rgw_bucket_index_max_aio)(); } @@ -440,7 +430,7 @@ int RGWSI_BucketIndex_RADOS::get_reshard_status(const DoutPrefixProvider *dpp, c { map<int, string> bucket_objs; - RGWSI_RADOS::Pool index_pool; + librados::IoCtx index_pool; int r = open_bucket_index(dpp, bucket_info, std::nullopt, @@ -455,7 +445,7 @@ int RGWSI_BucketIndex_RADOS::get_reshard_status(const DoutPrefixProvider *dpp, c for (auto i : bucket_objs) { cls_rgw_bucket_instance_entry entry; - int ret = cls_rgw_get_bucket_resharding(index_pool.ioctx(), i.second, &entry); + int ret = cls_rgw_get_bucket_resharding(index_pool, i.second, &entry); if (ret < 0 && ret != -ENOENT) { ldpp_dout(dpp, -1) << "ERROR: " << __func__ << ": cls_rgw_get_bucket_resharding() returned ret=" << ret << dendl; return ret; diff --git a/src/rgw/services/svc_bi_rados.h b/src/rgw/services/svc_bi_rados.h index feba0cfcd19..c6c11f8bc00 100644 --- a/src/rgw/services/svc_bi_rados.h +++ b/src/rgw/services/svc_bi_rados.h @@ -21,7 +21,6 @@ #include "rgw_tools.h" #include "svc_bi.h" -#include "svc_rados.h" #include "svc_tier_rados.h" struct rgw_bucket_dir_header; @@ -45,15 +44,15 @@ class RGWSI_BucketIndex_RADOS : public RGWSI_BucketIndex int open_pool(const DoutPrefixProvider *dpp, const rgw_pool& pool, - RGWSI_RADOS::Pool *index_pool, + librados::IoCtx* index_pool, bool mostly_omap); int open_bucket_index_pool(const DoutPrefixProvider *dpp, - const RGWBucketInfo& bucket_info, - RGWSI_RADOS::Pool *index_pool); + const RGWBucketInfo& bucket_info, + librados::IoCtx* index_pool); int open_bucket_index_base(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, - RGWSI_RADOS::Pool *index_pool, + librados::IoCtx* index_pool, std::string *bucket_oid_base); // return the index oid for the given shard id @@ -77,9 +76,10 @@ class RGWSI_BucketIndex_RADOS : public RGWSI_BucketIndex public: + librados::Rados* rados{nullptr}; + struct Svc { RGWSI_Zone *zone{nullptr}; - RGWSI_RADOS *rados{nullptr}; RGWSI_BILog_RADOS *bilog{nullptr}; RGWDataChangesLog *datalog_rados{nullptr}; } svc; @@ -87,7 +87,7 @@ public: RGWSI_BucketIndex_RADOS(CephContext *cct); void init(RGWSI_Zone *zone_svc, - RGWSI_RADOS *rados_svc, + librados::Rados* rados_, RGWSI_BILog_RADOS *bilog_svc, RGWDataChangesLog *datalog_rados_svc); @@ -141,26 +141,24 @@ public: int open_bucket_index_shard(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const std::string& obj_key, - RGWSI_RADOS::Obj *bucket_obj, + rgw_rados_ref* bucket_obj, int *shard_id); int open_bucket_index_shard(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& index, - int shard_id, RGWSI_RADOS::Obj *bucket_obj); + int shard_id, rgw_rados_ref* bucket_obj); int open_bucket_index(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, - RGWSI_RADOS::Pool *index_pool, + librados::IoCtx* index_pool, std::string *bucket_oid); int open_bucket_index(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, std::optional<int> shard_id, const rgw::bucket_index_layout_generation& idx_layout, - RGWSI_RADOS::Pool *index_pool, + librados::IoCtx* index_pool, std::map<int, std::string> *bucket_objs, std::map<int, std::string> *bucket_instance_ids); }; - - diff --git a/src/rgw/services/svc_bilog_rados.cc b/src/rgw/services/svc_bilog_rados.cc index f4bb13ec1f0..a33eb508848 100644 --- a/src/rgw/services/svc_bilog_rados.cc +++ b/src/rgw/services/svc_bilog_rados.cc @@ -26,7 +26,7 @@ int RGWSI_BILog_RADOS::log_trim(const DoutPrefixProvider *dpp, std::string_view start_marker, std::string_view end_marker) { - RGWSI_RADOS::Pool index_pool; + librados::IoCtx index_pool; map<int, string> bucket_objs; BucketIndexShardsManager start_marker_mgr; @@ -48,32 +48,32 @@ int RGWSI_BILog_RADOS::log_trim(const DoutPrefixProvider *dpp, return r; } - return CLSRGWIssueBILogTrim(index_pool.ioctx(), start_marker_mgr, end_marker_mgr, bucket_objs, + return CLSRGWIssueBILogTrim(index_pool, start_marker_mgr, end_marker_mgr, bucket_objs, cct->_conf->rgw_bucket_index_max_aio)(); } int RGWSI_BILog_RADOS::log_start(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw::bucket_log_layout_generation& log_layout, int shard_id) { - RGWSI_RADOS::Pool index_pool; + librados::IoCtx index_pool; map<int, string> bucket_objs; const auto& current_index = rgw::log_to_index_layout(log_layout); int r = svc.bi->open_bucket_index(dpp, bucket_info, shard_id, current_index, &index_pool, &bucket_objs, nullptr); if (r < 0) return r; - return CLSRGWIssueResyncBucketBILog(index_pool.ioctx(), bucket_objs, cct->_conf->rgw_bucket_index_max_aio)(); + return CLSRGWIssueResyncBucketBILog(index_pool, bucket_objs, cct->_conf->rgw_bucket_index_max_aio)(); } int RGWSI_BILog_RADOS::log_stop(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw::bucket_log_layout_generation& log_layout, int shard_id) { - RGWSI_RADOS::Pool index_pool; + librados::IoCtx index_pool; map<int, string> bucket_objs; const auto& current_index = rgw::log_to_index_layout(log_layout); int r = svc.bi->open_bucket_index(dpp, bucket_info, shard_id, current_index, &index_pool, &bucket_objs, nullptr); if (r < 0) return r; - return CLSRGWIssueBucketBILogStop(index_pool.ioctx(), bucket_objs, cct->_conf->rgw_bucket_index_max_aio)(); + return CLSRGWIssueBucketBILogStop(index_pool, bucket_objs, cct->_conf->rgw_bucket_index_max_aio)(); } static void build_bucket_index_marker(const string& shard_id_str, @@ -95,7 +95,7 @@ int RGWSI_BILog_RADOS::log_list(const DoutPrefixProvider *dpp, ldpp_dout(dpp, 20) << __func__ << ": " << bucket_info.bucket << " marker " << marker << " shard_id=" << shard_id << " max " << max << dendl; result.clear(); - RGWSI_RADOS::Pool index_pool; + librados::IoCtx index_pool; map<int, string> oids; map<int, cls_rgw_bi_log_list_ret> bi_log_lists; const auto& current_index = rgw::log_to_index_layout(log_layout); @@ -112,8 +112,8 @@ int RGWSI_BILog_RADOS::log_list(const DoutPrefixProvider *dpp, r = marker_mgr.from_string(marker, shard_id); if (r < 0) return r; - - r = CLSRGWIssueBILogList(index_pool.ioctx(), marker_mgr, max, oids, bi_log_lists, cct->_conf->rgw_bucket_index_max_aio)(); + + r = CLSRGWIssueBILogList(index_pool, marker_mgr, max, oids, bi_log_lists, cct->_conf->rgw_bucket_index_max_aio)(); if (r < 0) return r; diff --git a/src/rgw/services/svc_bilog_rados.h b/src/rgw/services/svc_bilog_rados.h index e9d5dbb5c0e..e9c948d3fa2 100644 --- a/src/rgw/services/svc_bilog_rados.h +++ b/src/rgw/services/svc_bilog_rados.h @@ -1,4 +1,3 @@ - // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab ft=cpp @@ -19,11 +18,6 @@ #include "rgw_service.h" -#include "svc_rados.h" - - - - class RGWSI_BILog_RADOS : public RGWServiceInstance { public: diff --git a/src/rgw/services/svc_cls.cc b/src/rgw/services/svc_cls.cc index 342146bfefa..740d9ab0307 100644 --- a/src/rgw/services/svc_cls.cc +++ b/src/rgw/services/svc_cls.cc @@ -3,7 +3,6 @@ #include "svc_cls.h" -#include "svc_rados.h" #include "svc_zone.h" #include "rgw_zone.h" @@ -30,13 +29,12 @@ int RGWSI_Cls::do_start(optional_yield y, const DoutPrefixProvider *dpp) return 0; } -int RGWSI_Cls::MFA::get_mfa_obj(const DoutPrefixProvider *dpp, const rgw_user& user, std::optional<RGWSI_RADOS::Obj> *obj) +int RGWSI_Cls::MFA::get_mfa_ref(const DoutPrefixProvider *dpp, const rgw_user& user, rgw_rados_ref *ref) { string oid = get_mfa_oid(user); - rgw_raw_obj o(zone_svc->get_zone_params().otp_pool, oid); + rgw_raw_obj o(cls->zone_svc->get_zone_params().otp_pool, oid); - obj->emplace(rados_svc->obj(o)); - int r = (*obj)->open(dpp); + auto r = rgw_get_rados_ref(dpp, cls->rados, o, ref); if (r < 0) { ldpp_dout(dpp, 4) << "failed to open rados context for " << o << dendl; return r; @@ -45,17 +43,6 @@ int RGWSI_Cls::MFA::get_mfa_obj(const DoutPrefixProvider *dpp, const rgw_user& u return 0; } -int RGWSI_Cls::MFA::get_mfa_ref(const DoutPrefixProvider *dpp, const rgw_user& user, rgw_rados_ref *ref) -{ - std::optional<RGWSI_RADOS::Obj> obj; - int r = get_mfa_obj(dpp, user, &obj); - if (r < 0) { - return r; - } - *ref = obj->get_ref(); - return 0; -} - int RGWSI_Cls::MFA::check_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, const string& otp_id, const string& pin, optional_yield y) { rgw_rados_ref ref; @@ -66,7 +53,7 @@ int RGWSI_Cls::MFA::check_mfa(const DoutPrefixProvider *dpp, const rgw_user& use rados::cls::otp::otp_check_t result; - r = rados::cls::otp::OTP::check(cct, ref.pool.ioctx(), ref.obj.oid, otp_id, pin, &result); + r = rados::cls::otp::OTP::check(cct, ref.ioctx, ref.obj.oid, otp_id, pin, &result); if (r < 0) return r; @@ -102,8 +89,8 @@ void RGWSI_Cls::MFA::prepare_mfa_write(librados::ObjectWriteOperation *op, int RGWSI_Cls::MFA::create_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, const rados::cls::otp::otp_info_t& config, RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime, optional_yield y) { - std::optional<RGWSI_RADOS::Obj> obj; - int r = get_mfa_obj(dpp, user, &obj); + rgw_rados_ref obj; + int r = get_mfa_ref(dpp, user, &obj); if (r < 0) { return r; } @@ -111,7 +98,7 @@ int RGWSI_Cls::MFA::create_mfa(const DoutPrefixProvider *dpp, const rgw_user& us librados::ObjectWriteOperation op; prepare_mfa_write(&op, objv_tracker, mtime); rados::cls::otp::OTP::create(&op, config); - r = obj->operate(dpp, &op, y); + r = obj.operate(dpp, &op, y); if (r < 0) { ldpp_dout(dpp, 20) << "OTP create, otp_id=" << config.id << " result=" << (int)r << dendl; return r; @@ -126,8 +113,8 @@ int RGWSI_Cls::MFA::remove_mfa(const DoutPrefixProvider *dpp, const ceph::real_time& mtime, optional_yield y) { - std::optional<RGWSI_RADOS::Obj> obj; - int r = get_mfa_obj(dpp, user, &obj); + rgw_rados_ref obj; + int r = get_mfa_ref(dpp, user, &obj); if (r < 0) { return r; } @@ -135,7 +122,7 @@ int RGWSI_Cls::MFA::remove_mfa(const DoutPrefixProvider *dpp, librados::ObjectWriteOperation op; prepare_mfa_write(&op, objv_tracker, mtime); rados::cls::otp::OTP::remove(&op, id); - r = obj->operate(dpp, &op, y); + r = obj.operate(dpp, &op, y); if (r < 0) { ldpp_dout(dpp, 20) << "OTP remove, otp_id=" << id << " result=" << (int)r << dendl; return r; @@ -154,7 +141,7 @@ int RGWSI_Cls::MFA::get_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, return r; } - r = rados::cls::otp::OTP::get(nullptr, ref.pool.ioctx(), ref.obj.oid, id, result); + r = rados::cls::otp::OTP::get(nullptr, ref.ioctx, ref.obj.oid, id, result); if (r < 0) { return r; } @@ -172,7 +159,7 @@ int RGWSI_Cls::MFA::list_mfa(const DoutPrefixProvider *dpp, const rgw_user& user return r; } - r = rados::cls::otp::OTP::get_all(nullptr, ref.pool.ioctx(), ref.obj.oid, result); + r = rados::cls::otp::OTP::get_all(nullptr, ref.ioctx, ref.obj.oid, result); if (r < 0) { return r; } @@ -190,7 +177,7 @@ int RGWSI_Cls::MFA::otp_get_current_time(const DoutPrefixProvider *dpp, const rg return r; } - r = rados::cls::otp::OTP::get_current_time(ref.pool.ioctx(), ref.obj.oid, result); + r = rados::cls::otp::OTP::get_current_time(ref.ioctx, ref.obj.oid, result); if (r < 0) { return r; } @@ -203,11 +190,12 @@ int RGWSI_Cls::MFA::set_mfa(const DoutPrefixProvider *dpp, const string& oid, co const real_time& mtime, optional_yield y) { - rgw_raw_obj o(zone_svc->get_zone_params().otp_pool, oid); - auto obj = rados_svc->obj(o); - int r = obj.open(dpp); + rgw_rados_ref obj; + int r = rgw_get_rados_ref(dpp, cls->rados, + { cls->zone_svc->get_zone_params().otp_pool, oid }, + &obj); if (r < 0) { - ldpp_dout(dpp, 4) << "failed to open rados context for " << o << dendl; + ldpp_dout(dpp, 4) << "failed to open rados context for " << oid << dendl; return r; } librados::ObjectWriteOperation op; @@ -231,21 +219,21 @@ int RGWSI_Cls::MFA::list_mfa(const DoutPrefixProvider *dpp, const string& oid, l RGWObjVersionTracker *objv_tracker, ceph::real_time *pmtime, optional_yield y) { - rgw_raw_obj o(zone_svc->get_zone_params().otp_pool, oid); - auto obj = rados_svc->obj(o); - int r = obj.open(dpp); + rgw_rados_ref ref; + int r = rgw_get_rados_ref(dpp, cls->rados, + { cls->zone_svc->get_zone_params().otp_pool, oid }, + &ref); if (r < 0) { - ldpp_dout(dpp, 4) << "failed to open rados context for " << o << dendl; + ldpp_dout(dpp, 4) << "failed to open rados context for " << oid << dendl; return r; } - auto& ref = obj.get_ref(); librados::ObjectReadOperation op; struct timespec mtime_ts; if (pmtime) { op.stat2(nullptr, &mtime_ts, nullptr); } objv_tracker->prepare_op_for_read(&op); - r = rados::cls::otp::OTP::get_all(&op, ref.pool.ioctx(), ref.obj.oid, result); + r = rados::cls::otp::OTP::get_all(&op, ref.ioctx, ref.obj.oid, result); if (r < 0) { return r; } @@ -265,14 +253,13 @@ void RGWSI_Cls::TimeLog::prepare_entry(cls_log_entry& entry, cls_log_add_prepare_entry(entry, utime_t(ut), section, key, bl); } -int RGWSI_Cls::TimeLog::init_obj(const DoutPrefixProvider *dpp, const string& oid, RGWSI_RADOS::Obj& obj) +int RGWSI_Cls::TimeLog::init_obj(const DoutPrefixProvider *dpp, const string& oid, rgw_rados_ref& obj) { - rgw_raw_obj o(zone_svc->get_zone_params().log_pool, oid); - obj = rados_svc->obj(o); - return obj.open(dpp); - + rgw_raw_obj o(cls->zone_svc->get_zone_params().log_pool, oid); + return rgw_get_rados_ref(dpp, cls->rados, o, &obj); } -int RGWSI_Cls::TimeLog::add(const DoutPrefixProvider *dpp, + +int RGWSI_Cls::TimeLog::add(const DoutPrefixProvider *dpp, const string& oid, const real_time& ut, const string& section, @@ -280,8 +267,7 @@ int RGWSI_Cls::TimeLog::add(const DoutPrefixProvider *dpp, bufferlist& bl, optional_yield y) { - RGWSI_RADOS::Obj obj; - + rgw_rados_ref obj; int r = init_obj(dpp, oid, obj); if (r < 0) { return r; @@ -301,7 +287,7 @@ int RGWSI_Cls::TimeLog::add(const DoutPrefixProvider *dpp, bool monotonic_inc, optional_yield y) { - RGWSI_RADOS::Obj obj; + rgw_rados_ref obj; int r = init_obj(dpp, oid, obj); if (r < 0) { @@ -329,7 +315,7 @@ int RGWSI_Cls::TimeLog::list(const DoutPrefixProvider *dpp, bool *truncated, optional_yield y) { - RGWSI_RADOS::Obj obj; + rgw_rados_ref obj; int r = init_obj(dpp, oid, obj); if (r < 0) { @@ -358,7 +344,7 @@ int RGWSI_Cls::TimeLog::info(const DoutPrefixProvider *dpp, cls_log_header *header, optional_yield y) { - RGWSI_RADOS::Obj obj; + rgw_rados_ref obj; int r = init_obj(dpp, oid, obj); if (r < 0) { @@ -379,7 +365,7 @@ int RGWSI_Cls::TimeLog::info(const DoutPrefixProvider *dpp, } int RGWSI_Cls::TimeLog::info_async(const DoutPrefixProvider *dpp, - RGWSI_RADOS::Obj& obj, + rgw_rados_ref& obj, const string& oid, cls_log_header *header, librados::AioCompletion *completion) @@ -409,7 +395,7 @@ int RGWSI_Cls::TimeLog::trim(const DoutPrefixProvider *dpp, librados::AioCompletion *completion, optional_yield y) { - RGWSI_RADOS::Obj obj; + rgw_rados_ref obj; int r = init_obj(dpp, oid, obj); if (r < 0) { @@ -438,22 +424,23 @@ int RGWSI_Cls::Lock::lock_exclusive(const DoutPrefixProvider *dpp, string& owner_id, std::optional<string> lock_name) { - auto p = rados_svc->pool(pool); - int r = p.open(dpp); + + librados::IoCtx p; + int r = rgw_init_ioctx(dpp, cls->rados, pool, p, true, false); if (r < 0) { return r; } uint64_t msec = std::chrono::duration_cast<std::chrono::milliseconds>(duration).count(); utime_t ut(msec / 1000, msec % 1000); - + rados::cls::lock::Lock l(lock_name.value_or(log_lock_name)); l.set_duration(ut); l.set_cookie(owner_id); l.set_tag(zone_id); l.set_may_renew(true); - - return l.lock_exclusive(&p.ioctx(), oid); + + return l.lock_exclusive(&p, oid); } int RGWSI_Cls::Lock::unlock(const DoutPrefixProvider *dpp, @@ -463,16 +450,15 @@ int RGWSI_Cls::Lock::unlock(const DoutPrefixProvider *dpp, string& owner_id, std::optional<string> lock_name) { - auto p = rados_svc->pool(pool); - int r = p.open(dpp); + librados::IoCtx p; + int r = rgw_init_ioctx(dpp, cls->rados, pool, p, true, false); if (r < 0) { return r; } - + rados::cls::lock::Lock l(lock_name.value_or(log_lock_name)); l.set_tag(zone_id); l.set_cookie(owner_id); - - return l.unlock(&p.ioctx(), oid); -} + return l.unlock(&p, oid); +} diff --git a/src/rgw/services/svc_cls.h b/src/rgw/services/svc_cls.h index d1d1d659be8..6648714dbc8 100644 --- a/src/rgw/services/svc_cls.h +++ b/src/rgw/services/svc_cls.h @@ -21,25 +21,21 @@ #include "rgw_service.h" -#include "svc_rados.h" +#include "driver/rados/rgw_tools.h" class RGWSI_Cls : public RGWServiceInstance { RGWSI_Zone *zone_svc{nullptr}; - RGWSI_RADOS *rados_svc{nullptr}; + librados::Rados* rados{nullptr}; class ClsSubService : public RGWServiceInstance { friend class RGWSI_Cls; - RGWSI_Cls *cls_svc{nullptr}; - RGWSI_Zone *zone_svc{nullptr}; - RGWSI_RADOS *rados_svc{nullptr}; + RGWSI_Cls *cls{nullptr}; - void init(RGWSI_Cls *_cls_svc, RGWSI_Zone *_zone_svc, RGWSI_RADOS *_rados_svc) { - cls_svc = _cls_svc; - zone_svc = _cls_svc->zone_svc; - rados_svc = _cls_svc->rados_svc; + void init(RGWSI_Cls *cls_) { + cls = cls_; } public: @@ -48,7 +44,6 @@ class RGWSI_Cls : public RGWServiceInstance public: class MFA : public ClsSubService { - int get_mfa_obj(const DoutPrefixProvider *dpp, const rgw_user& user, std::optional<RGWSI_RADOS::Obj> *obj); int get_mfa_ref(const DoutPrefixProvider *dpp, const rgw_user& user, rgw_rados_ref *ref); void prepare_mfa_write(librados::ObjectWriteOperation *op, @@ -81,7 +76,7 @@ public: } mfa; class TimeLog : public ClsSubService { - int init_obj(const DoutPrefixProvider *dpp, const std::string& oid, RGWSI_RADOS::Obj& obj); + int init_obj(const DoutPrefixProvider *dpp, const std::string& oid, rgw_rados_ref& obj); public: TimeLog(CephContext *cct): ClsSubService(cct) {} @@ -117,7 +112,7 @@ public: cls_log_header *header, optional_yield y); int info_async(const DoutPrefixProvider *dpp, - RGWSI_RADOS::Obj& obj, + rgw_rados_ref& obj, const std::string& oid, cls_log_header *header, librados::AioCompletion *completion); @@ -132,7 +127,7 @@ public: } timelog; class Lock : public ClsSubService { - int init_obj(const std::string& oid, RGWSI_RADOS::Obj& obj); + int init_obj(const std::string& oid, rgw_rados_ref& obj); public: Lock(CephContext *cct): ClsSubService(cct) {} int lock_exclusive(const DoutPrefixProvider *dpp, @@ -152,15 +147,14 @@ public: RGWSI_Cls(CephContext *cct): RGWServiceInstance(cct), mfa(cct), timelog(cct), lock(cct) {} - void init(RGWSI_Zone *_zone_svc, RGWSI_RADOS *_rados_svc) { - rados_svc = _rados_svc; + void init(RGWSI_Zone *_zone_svc, librados::Rados* rados_) { + rados = rados_; zone_svc = _zone_svc; - mfa.init(this, zone_svc, rados_svc); - timelog.init(this, zone_svc, rados_svc); - lock.init(this, zone_svc, rados_svc); + mfa.init(this); + timelog.init(this); + lock.init(this); } int do_start(optional_yield, const DoutPrefixProvider *dpp) override; }; - diff --git a/src/rgw/services/svc_config_key_rados.cc b/src/rgw/services/svc_config_key_rados.cc index 5edb02ea7f3..c17139af292 100644 --- a/src/rgw/services/svc_config_key_rados.cc +++ b/src/rgw/services/svc_config_key_rados.cc @@ -1,14 +1,17 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp -#include "svc_rados.h" #include "svc_config_key_rados.h" -using namespace std; +#include "rgw_tools.h" + +using std::string; RGWSI_ConfigKey_RADOS::~RGWSI_ConfigKey_RADOS(){} int RGWSI_ConfigKey_RADOS::do_start(optional_yield, const DoutPrefixProvider *dpp) { - maybe_insecure_mon_conn = !svc.rados->check_secure_mon_conn(dpp); + maybe_insecure_mon_conn = !rgw_check_secure_mon_conn(dpp); return 0; } @@ -20,14 +23,17 @@ void RGWSI_ConfigKey_RADOS::warn_if_insecure() return; } - string s = "rgw is configured to optionally allow insecure connections to the monitors (auth_supported, ms_mon_client_mode), ssl certificates stored at the monitor configuration could leak"; + string s = ("rgw is configured to optionally allow insecure connections to " + "the monitors (auth_supported, ms_mon_client_mode), ssl " + "certificates stored at the monitor configuration could leak"); - svc.rados->clog_warn(s); + rgw_clog_warn(rados, s); lderr(ctx()) << __func__ << "(): WARNING: " << s << dendl; } -int RGWSI_ConfigKey_RADOS::get(const string& key, bool secure, bufferlist *result) +int RGWSI_ConfigKey_RADOS::get(const string& key, bool secure, + bufferlist *result) { string cmd = "{" @@ -36,8 +42,7 @@ int RGWSI_ConfigKey_RADOS::get(const string& key, bool secure, bufferlist *resul "}"; bufferlist inbl; - auto handle = svc.rados->handle(); - int ret = handle.mon_command(cmd, inbl, result, nullptr); + int ret = rados->mon_command(cmd, inbl, result, nullptr); if (ret < 0) { return ret; } diff --git a/src/rgw/services/svc_config_key_rados.h b/src/rgw/services/svc_config_key_rados.h index b3b995ac76d..344350278af 100644 --- a/src/rgw/services/svc_config_key_rados.h +++ b/src/rgw/services/svc_config_key_rados.h @@ -1,5 +1,3 @@ - - // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab ft=cpp @@ -24,8 +22,6 @@ #include "svc_config_key.h" -class RGWSI_RADOS; - class RGWSI_ConfigKey_RADOS : public RGWSI_ConfigKey { bool maybe_insecure_mon_conn{false}; @@ -36,12 +32,10 @@ class RGWSI_ConfigKey_RADOS : public RGWSI_ConfigKey void warn_if_insecure(); public: - struct Svc { - RGWSI_RADOS *rados{nullptr}; - } svc; + librados::Rados* rados{nullptr}; - void init(RGWSI_RADOS *rados_svc) { - svc.rados = rados_svc; + void init(librados::Rados* rados_) { + rados = rados_; } RGWSI_ConfigKey_RADOS(CephContext *cct) : RGWSI_ConfigKey(cct) {} @@ -50,5 +44,3 @@ public: int get(const std::string& key, bool secure, bufferlist *result) override; }; - - diff --git a/src/rgw/services/svc_mdlog.cc b/src/rgw/services/svc_mdlog.cc index da723f3e129..46a158ceb77 100644 --- a/src/rgw/services/svc_mdlog.cc +++ b/src/rgw/services/svc_mdlog.cc @@ -2,7 +2,6 @@ // vim: ts=8 sw=2 smarttab ft=cpp #include "svc_mdlog.h" -#include "svc_rados.h" #include "svc_zone.h" #include "svc_sys_obj.h" @@ -30,13 +29,16 @@ RGWSI_MDLog::RGWSI_MDLog(CephContext *cct, bool _run_sync) : RGWServiceInstance( RGWSI_MDLog::~RGWSI_MDLog() { } -int RGWSI_MDLog::init(RGWSI_RADOS *_rados_svc, RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc, RGWSI_Cls *_cls_svc) +int RGWSI_MDLog::init(librados::Rados* rados_, RGWSI_Zone *_zone_svc, + RGWSI_SysObj *_sysobj_svc, RGWSI_Cls *_cls_svc, + RGWAsyncRadosProcessor* async_processor_) { svc.zone = _zone_svc; svc.sysobj = _sysobj_svc; svc.mdlog = this; - svc.rados = _rados_svc; + rados = rados_; svc.cls = _cls_svc; + async_processor = async_processor_; return 0; } @@ -262,11 +264,12 @@ class ReadHistoryCR : public RGWCoroutine { ReadHistoryCR(const DoutPrefixProvider *dpp, const Svc& svc, Cursor *cursor, - RGWObjVersionTracker *objv_tracker) + RGWObjVersionTracker *objv_tracker, + RGWAsyncRadosProcessor* async_processor) : RGWCoroutine(svc.zone->ctx()), dpp(dpp), svc(svc), cursor(cursor), objv_tracker(objv_tracker), - async_processor(svc.rados->get_async_processor()) + async_processor(async_processor) {} int operate(const DoutPrefixProvider *dpp) { @@ -312,10 +315,11 @@ class WriteHistoryCR : public RGWCoroutine { WriteHistoryCR(const DoutPrefixProvider *dpp, Svc& svc, const Cursor& cursor, - RGWObjVersionTracker *objv) + RGWObjVersionTracker *objv, + RGWAsyncRadosProcessor* async_processor) : RGWCoroutine(svc.zone->ctx()), dpp(dpp), svc(svc), cursor(cursor), objv(objv), - async_processor(svc.rados->get_async_processor()) + async_processor(async_processor) {} int operate(const DoutPrefixProvider *dpp) { @@ -353,18 +357,22 @@ class TrimHistoryCR : public RGWCoroutine { RGWObjVersionTracker *objv; //< to prevent racing updates Cursor next; //< target cursor for oldest log period Cursor existing; //< existing cursor read from disk + RGWAsyncRadosProcessor* async_processor; public: - TrimHistoryCR(const DoutPrefixProvider *dpp, const Svc& svc, Cursor cursor, RGWObjVersionTracker *objv) + TrimHistoryCR(const DoutPrefixProvider *dpp, const Svc& svc, Cursor cursor, + RGWObjVersionTracker *objv, + RGWAsyncRadosProcessor* async_processor) : RGWCoroutine(svc.zone->ctx()), dpp(dpp), svc(svc), - cursor(cursor), objv(objv), next(cursor) { + cursor(cursor), objv(objv), next(cursor), + async_processor(async_processor) { next.next(); // advance past cursor } int operate(const DoutPrefixProvider *dpp) { reenter(this) { // read an existing history, and write the new history if it's newer - yield call(new ReadHistoryCR(dpp, svc, &existing, objv)); + yield call(new ReadHistoryCR(dpp, svc, &existing, objv, async_processor)); if (retcode < 0) { return set_cr_error(retcode); } @@ -375,7 +383,7 @@ class TrimHistoryCR : public RGWCoroutine { return set_cr_error(-ECANCELED); } // overwrite with updated history - yield call(new WriteHistoryCR(dpp, svc, next, objv)); + yield call(new WriteHistoryCR(dpp, svc, next, objv, async_processor)); if (retcode < 0) { return set_cr_error(retcode); } @@ -512,13 +520,13 @@ Cursor RGWSI_MDLog::read_oldest_log_period(optional_yield y, const DoutPrefixPro RGWCoroutine* RGWSI_MDLog::read_oldest_log_period_cr(const DoutPrefixProvider *dpp, Cursor *period, RGWObjVersionTracker *objv) const { - return new mdlog::ReadHistoryCR(dpp, svc, period, objv); + return new mdlog::ReadHistoryCR(dpp, svc, period, objv, async_processor); } RGWCoroutine* RGWSI_MDLog::trim_log_period_cr(const DoutPrefixProvider *dpp, Cursor period, RGWObjVersionTracker *objv) const { - return new mdlog::TrimHistoryCR(dpp, svc, period, objv); + return new mdlog::TrimHistoryCR(dpp, svc, period, objv, async_processor); } RGWMetadataLog* RGWSI_MDLog::get_log(const std::string& period) diff --git a/src/rgw/services/svc_mdlog.h b/src/rgw/services/svc_mdlog.h index 8b37ba11e56..f169ee88db9 100644 --- a/src/rgw/services/svc_mdlog.h +++ b/src/rgw/services/svc_mdlog.h @@ -29,7 +29,6 @@ class RGWCoroutine; class RGWSI_Zone; class RGWSI_SysObj; -class RGWSI_RADOS; namespace mdlog { class ReadHistoryCR; @@ -58,18 +57,21 @@ public: RGWSI_MDLog(CephContext *cct, bool run_sync); virtual ~RGWSI_MDLog(); + librados::Rados* rados{nullptr}; + RGWAsyncRadosProcessor* async_processor{nullptr}; + struct Svc { - RGWSI_RADOS *rados{nullptr}; RGWSI_Zone *zone{nullptr}; RGWSI_SysObj *sysobj{nullptr}; RGWSI_MDLog *mdlog{nullptr}; RGWSI_Cls *cls{nullptr}; } svc; - int init(RGWSI_RADOS *_rados_svc, + int init(librados::Rados* rados_, RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc, - RGWSI_Cls *_cls_svc); + RGWSI_Cls *_cls_svc, + RGWAsyncRadosProcessor* async_processor_); int do_start(optional_yield y, const DoutPrefixProvider *dpp) override; diff --git a/src/rgw/services/svc_notify.cc b/src/rgw/services/svc_notify.cc index 43f84ed0a4f..5593dee9ae2 100644 --- a/src/rgw/services/svc_notify.cc +++ b/src/rgw/services/svc_notify.cc @@ -9,7 +9,6 @@ #include "svc_notify.h" #include "svc_finisher.h" #include "svc_zone.h" -#include "svc_rados.h" #include "rgw_zone.h" @@ -29,7 +28,7 @@ class RGWWatcher : public DoutPrefixProvider , public librados::WatchCtx2 { CephContext *cct; RGWSI_Notify *svc; int index; - RGWSI_RADOS::Obj obj; + rgw_rados_ref obj; uint64_t watch_handle; int register_ret{0}; bool unregister_done{false}; @@ -51,7 +50,8 @@ class RGWWatcher : public DoutPrefixProvider , public librados::WatchCtx2 { } public: - RGWWatcher(CephContext *_cct, RGWSI_Notify *s, int i, RGWSI_RADOS::Obj& o) : cct(_cct), svc(s), index(i), obj(o), watch_handle(0) {} + RGWWatcher(CephContext *_cct, RGWSI_Notify *s, int i, rgw_rados_ref& o) + : cct(_cct), svc(s), index(i), obj(o), watch_handle(0) {} void handle_notify(uint64_t notify_id, uint64_t cookie, uint64_t notifier_id, @@ -174,7 +174,7 @@ string RGWSI_Notify::get_control_oid(int i) } // do not call pick_obj_control before init_watch -RGWSI_RADOS::Obj RGWSI_Notify::pick_control_obj(const string& key) +rgw_rados_ref RGWSI_Notify::pick_control_obj(const string& key) { uint32_t r = ceph_str_hash_linux(key.c_str(), key.size()); @@ -206,17 +206,17 @@ int RGWSI_Notify::init_watch(const DoutPrefixProvider *dpp, optional_yield y) notify_oid = notify_oid_prefix; } - notify_objs[i] = rados_svc->handle().obj({control_pool, notify_oid}); - auto& notify_obj = notify_objs[i]; - - int r = notify_obj.open(dpp); + int r = rgw_get_rados_ref(dpp, rados, { control_pool, notify_oid }, + ¬ify_objs[i]); if (r < 0) { ldpp_dout(dpp, 0) << "ERROR: notify_obj.open() returned r=" << r << dendl; return r; } + auto& notify_obj = notify_objs[i]; librados::ObjectWriteOperation op; op.create(false); + r = notify_obj.operate(dpp, &op, y); if (r < 0 && r != -EEXIST) { ldpp_dout(dpp, 0) << "ERROR: notify_obj.operate() returned r=" << r << dendl; @@ -270,10 +270,6 @@ int RGWSI_Notify::do_start(optional_yield y, const DoutPrefixProvider *dpp) assert(zone_svc->is_started()); /* otherwise there's an ordering problem */ - r = rados_svc->start(y, dpp); - if (r < 0) { - return r; - } r = finisher_svc->start(y, dpp); if (r < 0) { return r; @@ -315,14 +311,14 @@ void RGWSI_Notify::shutdown() finalized = true; } -int RGWSI_Notify::unwatch(RGWSI_RADOS::Obj& obj, uint64_t watch_handle) +int RGWSI_Notify::unwatch(rgw_rados_ref& obj, uint64_t watch_handle) { int r = obj.unwatch(watch_handle); if (r < 0) { ldout(cct, 0) << "ERROR: rados->unwatch2() returned r=" << r << dendl; return r; } - r = rados_svc->handle().watch_flush(); + r = rados->watch_flush(); if (r < 0) { ldout(cct, 0) << "ERROR: rados->watch_flush() returned r=" << r << dendl; return r; @@ -392,9 +388,9 @@ int RGWSI_Notify::distribute(const DoutPrefixProvider *dpp, const string& key, which will lead to division by 0 in pick_obj_control (num_watchers is 0). */ if (num_watchers > 0) { - RGWSI_RADOS::Obj notify_obj = pick_control_obj(key); + auto notify_obj = pick_control_obj(key); - ldpp_dout(dpp, 10) << "distributing notification oid=" << notify_obj.get_ref().obj + ldpp_dout(dpp, 10) << "distributing notification oid=" << notify_obj.obj << " cni=" << cni << dendl; return robust_notify(dpp, notify_obj, cni, y); } @@ -443,7 +439,7 @@ static timeout_vector decode_timeouts(const bufferlist& bl) } int RGWSI_Notify::robust_notify(const DoutPrefixProvider *dpp, - RGWSI_RADOS::Obj& notify_obj, + rgw_rados_ref& notify_obj, const RGWCacheNotifyInfo& cni, optional_yield y) { diff --git a/src/rgw/services/svc_notify.h b/src/rgw/services/svc_notify.h index f7329136ece..4f7e9d17ee3 100644 --- a/src/rgw/services/svc_notify.h +++ b/src/rgw/services/svc_notify.h @@ -5,7 +5,7 @@ #include "rgw_service.h" -#include "svc_rados.h" +#include "rgw_tools.h" class Context; @@ -21,14 +21,14 @@ class RGWSI_Notify : public RGWServiceInstance { friend class RGWWatcher; friend class RGWSI_Notify_ShutdownCB; - friend class RGWServices_Def; + friend struct RGWServices_Def; public: class CB; private: RGWSI_Zone *zone_svc{nullptr}; - RGWSI_RADOS *rados_svc{nullptr}; + librados::Rados *rados{nullptr}; RGWSI_Finisher *finisher_svc{nullptr}; ceph::shared_mutex watchers_lock = ceph::make_shared_mutex("watchers_lock"); @@ -37,7 +37,7 @@ private: int num_watchers{0}; RGWWatcher **watchers{nullptr}; std::set<int> watchers_set; - std::vector<RGWSI_RADOS::Obj> notify_objs; + std::vector<rgw_rados_ref> notify_objs; bool enabled{false}; @@ -45,7 +45,7 @@ private: uint64_t max_notify_retries = 10; std::string get_control_oid(int i); - RGWSI_RADOS::Obj pick_control_obj(const std::string& key); + rgw_rados_ref pick_control_obj(const std::string& key); CB *cb{nullptr}; @@ -58,16 +58,16 @@ private: void finalize_watch(); void init(RGWSI_Zone *_zone_svc, - RGWSI_RADOS *_rados_svc, + librados::Rados* rados_, RGWSI_Finisher *_finisher_svc) { zone_svc = _zone_svc; - rados_svc = _rados_svc; + rados = rados_; finisher_svc = _finisher_svc; } int do_start(optional_yield, const DoutPrefixProvider *dpp) override; void shutdown() override; - int unwatch(RGWSI_RADOS::Obj& obj, uint64_t watch_handle); + int unwatch(rgw_rados_ref& obj, uint64_t watch_handle); void add_watcher(int i); void remove_watcher(int i); @@ -79,7 +79,7 @@ private: void _set_enabled(bool status); void set_enabled(bool status); - int robust_notify(const DoutPrefixProvider *dpp, RGWSI_RADOS::Obj& notify_obj, + int robust_notify(const DoutPrefixProvider *dpp, rgw_rados_ref& notify_obj, const RGWCacheNotifyInfo& bl, optional_yield y); void schedule_context(Context *c); diff --git a/src/rgw/services/svc_rados.cc b/src/rgw/services/svc_rados.cc deleted file mode 100644 index d682b87eb2c..00000000000 --- a/src/rgw/services/svc_rados.cc +++ /dev/null @@ -1,445 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#include "svc_rados.h" - -#include "include/rados/librados.hpp" -#include "common/errno.h" -#include "osd/osd_types.h" -#include "rgw_tools.h" -#include "rgw_cr_rados.h" - -#include "auth/AuthRegistry.h" - -#define dout_subsys ceph_subsys_rgw - -using namespace std; - -RGWSI_RADOS::RGWSI_RADOS(CephContext *cct) : RGWServiceInstance(cct) -{ -} - -RGWSI_RADOS::~RGWSI_RADOS() -{ -} - -int RGWSI_RADOS::do_start(optional_yield, const DoutPrefixProvider *dpp) -{ - int ret = rados.init_with_context(cct); - if (ret < 0) { - return ret; - } - ret = rados.connect(); - if (ret < 0) { - return ret; - } - - async_processor.reset(new RGWAsyncRadosProcessor(cct, cct->_conf->rgw_num_async_rados_threads)); - async_processor->start(); - - return 0; -} - -void RGWSI_RADOS::shutdown() -{ - if (async_processor) { - async_processor->stop(); - } - rados.shutdown(); -} - -void RGWSI_RADOS::stop_processor() -{ - if (async_processor) { - async_processor->stop(); - } -} - -librados::Rados* RGWSI_RADOS::get_rados_handle() -{ - return &rados; -} - -std::string RGWSI_RADOS::cluster_fsid() -{ - std::string fsid; - (void) get_rados_handle()->cluster_fsid(&fsid); - return fsid; -} - -uint64_t RGWSI_RADOS::instance_id() -{ - return get_rados_handle()->get_instance_id(); -} - -int RGWSI_RADOS::open_pool_ctx(const DoutPrefixProvider *dpp, const rgw_pool& pool, librados::IoCtx& io_ctx, - const OpenParams& params) -{ - return rgw_init_ioctx(dpp, get_rados_handle(), pool, io_ctx, - params.create, - params.mostly_omap); -} - -int RGWSI_RADOS::pool_iterate(const DoutPrefixProvider *dpp, - librados::IoCtx& io_ctx, - librados::NObjectIterator& iter, - uint32_t num, vector<rgw_bucket_dir_entry>& objs, - RGWAccessListFilter *filter, - bool *is_truncated) -{ - if (iter == io_ctx.nobjects_end()) - return -ENOENT; - - uint32_t i; - - for (i = 0; i < num && iter != io_ctx.nobjects_end(); ++i, ++iter) { - rgw_bucket_dir_entry e; - - string oid = iter->get_oid(); - ldpp_dout(dpp, 20) << "RGWRados::pool_iterate: got " << oid << dendl; - - // fill it in with initial values; we may correct later - if (filter && !filter->filter(oid, oid)) - continue; - - e.key = oid; - objs.push_back(e); - } - - if (is_truncated) - *is_truncated = (iter != io_ctx.nobjects_end()); - - return objs.size(); -} - -RGWSI_RADOS::Obj::Obj(Pool& pool, const string& oid) : rados_svc(pool.rados_svc) -{ - ref.pool = pool; - ref.obj = rgw_raw_obj(pool.get_pool(), oid); -} - -void RGWSI_RADOS::Obj::init(const rgw_raw_obj& obj) -{ - ref.pool = RGWSI_RADOS::Pool(rados_svc, obj.pool); - ref.obj = obj; -} - -int RGWSI_RADOS::Obj::open(const DoutPrefixProvider *dpp) -{ - int r = ref.pool.open(dpp); - if (r < 0) { - return r; - } - - ref.pool.ioctx().locator_set_key(ref.obj.loc); - - return 0; -} - -int RGWSI_RADOS::Obj::operate(const DoutPrefixProvider *dpp, librados::ObjectWriteOperation *op, - optional_yield y, int flags) -{ - return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, op, y, flags); -} - -int RGWSI_RADOS::Obj::operate(const DoutPrefixProvider *dpp, librados::ObjectReadOperation *op, - bufferlist *pbl, optional_yield y, int flags) -{ - return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, op, pbl, y, flags); -} - -int RGWSI_RADOS::Obj::aio_operate(librados::AioCompletion *c, librados::ObjectWriteOperation *op) -{ - return ref.pool.ioctx().aio_operate(ref.obj.oid, c, op); -} - -int RGWSI_RADOS::Obj::aio_operate(librados::AioCompletion *c, librados::ObjectReadOperation *op, - bufferlist *pbl) -{ - return ref.pool.ioctx().aio_operate(ref.obj.oid, c, op, pbl); -} - -int RGWSI_RADOS::Obj::watch(uint64_t *handle, librados::WatchCtx2 *ctx) -{ - return ref.pool.ioctx().watch2(ref.obj.oid, handle, ctx); -} - -int RGWSI_RADOS::Obj::aio_watch(librados::AioCompletion *c, uint64_t *handle, librados::WatchCtx2 *ctx) -{ - return ref.pool.ioctx().aio_watch(ref.obj.oid, c, handle, ctx); -} - -int RGWSI_RADOS::Obj::unwatch(uint64_t handle) -{ - return ref.pool.ioctx().unwatch2(handle); -} - -int RGWSI_RADOS::Obj::notify(const DoutPrefixProvider *dpp, bufferlist& bl, uint64_t timeout_ms, - bufferlist *pbl, optional_yield y) -{ - return rgw_rados_notify(dpp, ref.pool.ioctx(), ref.obj.oid, bl, timeout_ms, pbl, y); -} - -void RGWSI_RADOS::Obj::notify_ack(uint64_t notify_id, - uint64_t cookie, - bufferlist& bl) -{ - ref.pool.ioctx().notify_ack(ref.obj.oid, notify_id, cookie, bl); -} - -uint64_t RGWSI_RADOS::Obj::get_last_version() -{ - return ref.pool.ioctx().get_last_version(); -} - -int RGWSI_RADOS::Pool::create(const DoutPrefixProvider *dpp) -{ - librados::Rados *rad = rados_svc->get_rados_handle(); - int r = rad->pool_create(pool.name.c_str()); - if (r < 0) { - ldpp_dout(dpp, 0) << "WARNING: pool_create returned " << r << dendl; - return r; - } - librados::IoCtx io_ctx; - r = rad->ioctx_create(pool.name.c_str(), io_ctx); - if (r < 0) { - ldpp_dout(dpp, 0) << "WARNING: ioctx_create returned " << r << dendl; - return r; - } - r = io_ctx.application_enable(pg_pool_t::APPLICATION_NAME_RGW, false); - if (r < 0) { - ldpp_dout(dpp, 0) << "WARNING: application_enable returned " << r << dendl; - return r; - } - return 0; -} - -int RGWSI_RADOS::Pool::create(const DoutPrefixProvider *dpp, const vector<rgw_pool>& pools, vector<int> *retcodes) -{ - vector<librados::PoolAsyncCompletion *> completions; - vector<int> rets; - - librados::Rados *rad = rados_svc->get_rados_handle(); - for (auto iter = pools.begin(); iter != pools.end(); ++iter) { - librados::PoolAsyncCompletion *c = librados::Rados::pool_async_create_completion(); - completions.push_back(c); - auto& pool = *iter; - int ret = rad->pool_create_async(pool.name.c_str(), c); - rets.push_back(ret); - } - - vector<int>::iterator riter; - vector<librados::PoolAsyncCompletion *>::iterator citer; - - bool error = false; - ceph_assert(rets.size() == completions.size()); - for (riter = rets.begin(), citer = completions.begin(); riter != rets.end(); ++riter, ++citer) { - int r = *riter; - librados::PoolAsyncCompletion *c = *citer; - if (r == 0) { - c->wait(); - r = c->get_return_value(); - if (r < 0) { - ldpp_dout(dpp, 0) << "WARNING: async pool_create returned " << r << dendl; - error = true; - } - } - c->release(); - retcodes->push_back(r); - } - if (error) { - return 0; - } - - std::vector<librados::IoCtx> io_ctxs; - retcodes->clear(); - for (auto pool : pools) { - io_ctxs.emplace_back(); - int ret = rad->ioctx_create(pool.name.c_str(), io_ctxs.back()); - if (ret < 0) { - ldpp_dout(dpp, 0) << "WARNING: ioctx_create returned " << ret << dendl; - error = true; - } - retcodes->push_back(ret); - } - if (error) { - return 0; - } - - completions.clear(); - for (auto &io_ctx : io_ctxs) { - librados::PoolAsyncCompletion *c = - librados::Rados::pool_async_create_completion(); - completions.push_back(c); - int ret = io_ctx.application_enable_async(pg_pool_t::APPLICATION_NAME_RGW, - false, c); - ceph_assert(ret == 0); - } - - retcodes->clear(); - for (auto c : completions) { - c->wait(); - int ret = c->get_return_value(); - if (ret == -EOPNOTSUPP) { - ret = 0; - } else if (ret < 0) { - ldpp_dout(dpp, 0) << "WARNING: async application_enable returned " << ret - << dendl; - error = true; - } - c->release(); - retcodes->push_back(ret); - } - return 0; -} - -int RGWSI_RADOS::Pool::lookup() -{ - librados::Rados *rad = rados_svc->get_rados_handle(); - int ret = rad->pool_lookup(pool.name.c_str()); - if (ret < 0) { - return ret; - } - - return 0; -} - -int RGWSI_RADOS::Pool::open(const DoutPrefixProvider *dpp, const OpenParams& params) -{ - return rados_svc->open_pool_ctx(dpp, pool, state.ioctx, params); -} - -int RGWSI_RADOS::Pool::List::init(const DoutPrefixProvider *dpp, const string& marker, RGWAccessListFilter *filter) -{ - if (ctx.initialized) { - return -EINVAL; - } - - if (!pool) { - return -EINVAL; - } - - int r = pool->rados_svc->open_pool_ctx(dpp, pool->pool, ctx.ioctx); - if (r < 0) { - return r; - } - - librados::ObjectCursor oc; - if (!oc.from_str(marker)) { - ldpp_dout(dpp, 10) << "failed to parse cursor: " << marker << dendl; - return -EINVAL; - } - - try { - ctx.iter = ctx.ioctx.nobjects_begin(oc); - ctx.filter = filter; - ctx.initialized = true; - return 0; - } catch (const std::system_error& e) { - r = -e.code().value(); - ldpp_dout(dpp, 10) << "nobjects_begin threw " << e.what() - << ", returning " << r << dendl; - return r; - } catch (const std::exception& e) { - ldpp_dout(dpp, 10) << "nobjects_begin threw " << e.what() - << ", returning -5" << dendl; - return -EIO; - } -} - -int RGWSI_RADOS::Pool::List::get_next(const DoutPrefixProvider *dpp, - int max, - std::vector<string> *oids, - bool *is_truncated) -{ - if (!ctx.initialized) { - return -EINVAL; - } - vector<rgw_bucket_dir_entry> objs; - int r = pool->rados_svc->pool_iterate(dpp, ctx.ioctx, ctx.iter, max, objs, ctx.filter, is_truncated); - if (r < 0) { - if(r != -ENOENT) { - ldpp_dout(dpp, 10) << "failed to list objects pool_iterate returned r=" << r << dendl; - } - return r; - } - - for (auto& o : objs) { - oids->push_back(o.key.name); - } - - return oids->size(); -} - -RGWSI_RADOS::Obj RGWSI_RADOS::Handle::obj(const rgw_raw_obj& o) -{ - return RGWSI_RADOS::Obj(rados_svc, o); -} -int RGWSI_RADOS::Handle::watch_flush() -{ - librados::Rados *rad = rados_svc->get_rados_handle(); - return rad->watch_flush(); -} - -int RGWSI_RADOS::Handle::mon_command(std::string cmd, - const bufferlist& inbl, - bufferlist *outbl, - std::string *outs) -{ - librados::Rados *rad = rados_svc->get_rados_handle(); - return rad->mon_command(cmd, inbl, outbl, outs); -} - -int RGWSI_RADOS::Pool::List::get_marker(string *marker) -{ - if (!ctx.initialized) { - return -EINVAL; - } - - *marker = ctx.iter.get_cursor().to_str(); - return 0; -} - -int RGWSI_RADOS::clog_warn(const string& msg) -{ - string cmd = - "{" - "\"prefix\": \"log\", " - "\"level\": \"warn\", " - "\"logtext\": [\"" + msg + "\"]" - "}"; - - bufferlist inbl; - auto h = handle(); - return h.mon_command(cmd, inbl, nullptr, nullptr); -} - -bool RGWSI_RADOS::check_secure_mon_conn(const DoutPrefixProvider *dpp) const -{ - AuthRegistry reg(cct); - - reg.refresh_config(); - - std::vector<uint32_t> methods; - std::vector<uint32_t> modes; - - reg.get_supported_methods(CEPH_ENTITY_TYPE_MON, &methods, &modes); - ldpp_dout(dpp, 20) << __func__ << "(): auth registry supported: methods=" << methods << " modes=" << modes << dendl; - - for (auto method : methods) { - if (!reg.is_secure_method(method)) { - ldpp_dout(dpp, 20) << __func__ << "(): method " << method << " is insecure" << dendl; - return false; - } - } - - for (auto mode : modes) { - if (!reg.is_secure_mode(mode)) { - ldpp_dout(dpp, 20) << __func__ << "(): mode " << mode << " is insecure" << dendl; - return false; - } - } - - return true; -} - diff --git a/src/rgw/services/svc_rados.h b/src/rgw/services/svc_rados.h deleted file mode 100644 index ede029aa897..00000000000 --- a/src/rgw/services/svc_rados.h +++ /dev/null @@ -1,252 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab ft=cpp - -#pragma once - -#include "rgw_service.h" - -#include "include/rados/librados.hpp" -#include "common/async/yield_context.h" - -class RGWAsyncRadosProcessor; - -class RGWAccessListFilter { -public: - virtual ~RGWAccessListFilter() {} - virtual bool filter(const std::string& name, std::string& key) = 0; -}; - -struct RGWAccessListFilterPrefix : public RGWAccessListFilter { - std::string prefix; - - explicit RGWAccessListFilterPrefix(const std::string& _prefix) : prefix(_prefix) {} - bool filter(const std::string& name, std::string& key) override { - return (prefix.compare(key.substr(0, prefix.size())) == 0); - } -}; - -class RGWSI_RADOS : public RGWServiceInstance -{ - librados::Rados rados; - std::unique_ptr<RGWAsyncRadosProcessor> async_processor; - - int do_start(optional_yield, const DoutPrefixProvider *dpp) override; - -public: - struct OpenParams { - bool create{true}; - bool mostly_omap{false}; - - OpenParams() {} - - OpenParams& set_create(bool _create) { - create = _create; - return *this; - } - OpenParams& set_mostly_omap(bool _mostly_omap) { - mostly_omap = _mostly_omap; - return *this; - } - }; - -private: - int open_pool_ctx(const DoutPrefixProvider *dpp, const rgw_pool& pool, librados::IoCtx& io_ctx, - const OpenParams& params = {}); - int pool_iterate(const DoutPrefixProvider *dpp, - librados::IoCtx& ioctx, - librados::NObjectIterator& iter, - uint32_t num, std::vector<rgw_bucket_dir_entry>& objs, - RGWAccessListFilter *filter, - bool *is_truncated); - -public: - RGWSI_RADOS(CephContext *cct); - ~RGWSI_RADOS(); - librados::Rados* get_rados_handle(); - - void init() {} - void shutdown() override; - void stop_processor(); - - std::string cluster_fsid(); - uint64_t instance_id(); - bool check_secure_mon_conn(const DoutPrefixProvider *dpp) const; - - RGWAsyncRadosProcessor *get_async_processor() { - return async_processor.get(); - } - - int clog_warn(const std::string& msg); - - class Handle; - - class Pool { - friend class RGWSI_RADOS; - friend Handle; - friend class Obj; - - RGWSI_RADOS *rados_svc{nullptr}; - rgw_pool pool; - - struct State { - librados::IoCtx ioctx; - } state; - - Pool(RGWSI_RADOS *_rados_svc, - const rgw_pool& _pool) : rados_svc(_rados_svc), - pool(_pool) {} - - Pool(RGWSI_RADOS *_rados_svc) : rados_svc(_rados_svc) {} - public: - Pool() {} - - int create(const DoutPrefixProvider *dpp); - int create(const DoutPrefixProvider *dpp, const std::vector<rgw_pool>& pools, std::vector<int> *retcodes); - int lookup(); - int open(const DoutPrefixProvider *dpp, const OpenParams& params = {}); - - const rgw_pool& get_pool() { - return pool; - } - - librados::IoCtx& ioctx() & { - return state.ioctx; - } - - librados::IoCtx&& ioctx() && { - return std::move(state.ioctx); - } - - struct List { - Pool *pool{nullptr}; - - struct Ctx { - bool initialized{false}; - librados::IoCtx ioctx; - librados::NObjectIterator iter; - RGWAccessListFilter *filter{nullptr}; - } ctx; - - List() {} - List(Pool *_pool) : pool(_pool) {} - - int init(const DoutPrefixProvider *dpp, const std::string& marker, RGWAccessListFilter *filter = nullptr); - int get_next(const DoutPrefixProvider *dpp, int max, - std::vector<std::string> *oids, - bool *is_truncated); - - int get_marker(std::string *marker); - }; - - List op() { - return List(this); - } - - friend List; - }; - - - struct rados_ref { - RGWSI_RADOS::Pool pool; - rgw_raw_obj obj; - }; - - class Obj { - friend class RGWSI_RADOS; - friend class Handle; - - RGWSI_RADOS *rados_svc{nullptr}; - rados_ref ref; - - void init(const rgw_raw_obj& obj); - - Obj(RGWSI_RADOS *_rados_svc, const rgw_raw_obj& _obj) - : rados_svc(_rados_svc) { - init(_obj); - } - - Obj(Pool& pool, const std::string& oid); - - public: - Obj() {} - - int open(const DoutPrefixProvider *dpp); - - int operate(const DoutPrefixProvider *dpp, librados::ObjectWriteOperation *op, optional_yield y, - int flags = 0); - int operate(const DoutPrefixProvider *dpp, librados::ObjectReadOperation *op, bufferlist *pbl, - optional_yield y, int flags = 0); - int aio_operate(librados::AioCompletion *c, librados::ObjectWriteOperation *op); - int aio_operate(librados::AioCompletion *c, librados::ObjectReadOperation *op, - bufferlist *pbl); - - int watch(uint64_t *handle, librados::WatchCtx2 *ctx); - int aio_watch(librados::AioCompletion *c, uint64_t *handle, librados::WatchCtx2 *ctx); - int unwatch(uint64_t handle); - int notify(const DoutPrefixProvider *dpp, bufferlist& bl, uint64_t timeout_ms, - bufferlist *pbl, optional_yield y); - void notify_ack(uint64_t notify_id, - uint64_t cookie, - bufferlist& bl); - - uint64_t get_last_version(); - - rados_ref& get_ref() { return ref; } - const rados_ref& get_ref() const { return ref; } - - const rgw_raw_obj& get_raw_obj() const { - return ref.obj; - } - }; - - class Handle { - friend class RGWSI_RADOS; - - RGWSI_RADOS *rados_svc{nullptr}; - - Handle(RGWSI_RADOS *_rados_svc) : rados_svc(_rados_svc) {} - public: - Obj obj(const rgw_raw_obj& o); - - Pool pool(const rgw_pool& p) { - return Pool(rados_svc, p); - } - - int watch_flush(); - - int mon_command(std::string cmd, - const bufferlist& inbl, - bufferlist *outbl, - std::string *outs); - }; - - Handle handle() { - return Handle(this); - } - - Obj obj(const rgw_raw_obj& o) { - return Obj(this, o); - } - - Obj obj(Pool& pool, const std::string& oid) { - return Obj(pool, oid); - } - - Pool pool() { - return Pool(this); - } - - Pool pool(const rgw_pool& p) { - return Pool(this, p); - } - - friend Obj; - friend Pool; - friend Pool::List; -}; - -using rgw_rados_ref = RGWSI_RADOS::rados_ref; - -inline std::ostream& operator<<(std::ostream& out, const RGWSI_RADOS::Obj& obj) { - return out << obj.get_raw_obj(); -} diff --git a/src/rgw/services/svc_sys_obj.cc b/src/rgw/services/svc_sys_obj.cc index 310e60514d2..38e7acdfe0b 100644 --- a/src/rgw/services/svc_sys_obj.cc +++ b/src/rgw/services/svc_sys_obj.cc @@ -3,7 +3,6 @@ #include "svc_sys_obj.h" #include "svc_sys_obj_core.h" -#include "svc_rados.h" #include "svc_zone.h" #include "rgw_zone.h" diff --git a/src/rgw/services/svc_sys_obj.h b/src/rgw/services/svc_sys_obj.h index 292340dcbe3..0bba2fbe01d 100644 --- a/src/rgw/services/svc_sys_obj.h +++ b/src/rgw/services/svc_sys_obj.h @@ -7,7 +7,6 @@ #include "rgw_service.h" -#include "svc_rados.h" #include "svc_sys_obj_types.h" #include "svc_sys_obj_core_types.h" @@ -246,12 +245,12 @@ public: friend class Pool::Op; protected: - RGWSI_RADOS *rados_svc{nullptr}; + librados::Rados* rados{nullptr}; RGWSI_SysObj_Core *core_svc{nullptr}; - void init(RGWSI_RADOS *_rados_svc, + void init(librados::Rados* rados_, RGWSI_SysObj_Core *_core_svc) { - rados_svc = _rados_svc; + rados = rados_; core_svc = _core_svc; } diff --git a/src/rgw/services/svc_sys_obj_cache.cc b/src/rgw/services/svc_sys_obj_cache.cc index d1b7a3dbb3e..cd4e2c405ab 100644 --- a/src/rgw/services/svc_sys_obj_cache.cc +++ b/src/rgw/services/svc_sys_obj_cache.cc @@ -1,4 +1,3 @@ - // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab ft=cpp diff --git a/src/rgw/services/svc_sys_obj_cache.h b/src/rgw/services/svc_sys_obj_cache.h index f7950843fa9..8e2f5845dc1 100644 --- a/src/rgw/services/svc_sys_obj_cache.h +++ b/src/rgw/services/svc_sys_obj_cache.h @@ -17,7 +17,7 @@ class RGWSI_SysObj_Cache_ASocketHook; class RGWSI_SysObj_Cache : public RGWSI_SysObj_Core { friend class RGWSI_SysObj_Cache_CB; - friend class RGWServices_Def; + friend RGWServices_Def; friend class ASocketHandler; RGWSI_Notify *notify_svc{nullptr}; @@ -27,10 +27,10 @@ class RGWSI_SysObj_Cache : public RGWSI_SysObj_Core void normalize_pool_and_obj(const rgw_pool& src_pool, const std::string& src_obj, rgw_pool& dst_pool, std::string& dst_obj); protected: - void init(RGWSI_RADOS *_rados_svc, + void init(librados::Rados* rados_, RGWSI_Zone *_zone_svc, RGWSI_Notify *_notify_svc) { - core_init(_rados_svc, _zone_svc); + core_init(rados_, _zone_svc); notify_svc = _notify_svc; } @@ -80,12 +80,12 @@ protected: real_time set_mtime, optional_yield y) override; - int write_data(const DoutPrefixProvider *dpp, + int write_data(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, const bufferlist& bl, bool exclusive, RGWObjVersionTracker *objv_tracker, - optional_yield y); + optional_yield y) override; int distribute_cache(const DoutPrefixProvider *dpp, const std::string& normal_name, const rgw_raw_obj& obj, ObjectCacheInfo& obj_info, int op, diff --git a/src/rgw/services/svc_sys_obj_core.cc b/src/rgw/services/svc_sys_obj_core.cc index 30308969131..397709c5d99 100644 --- a/src/rgw/services/svc_sys_obj_core.cc +++ b/src/rgw/services/svc_sys_obj_core.cc @@ -2,7 +2,6 @@ // vim: ts=8 sw=2 smarttab ft=cpp #include "svc_sys_obj_core.h" -#include "svc_rados.h" #include "svc_zone.h" #include "rgw_tools.h" @@ -12,10 +11,10 @@ using namespace std; int RGWSI_SysObj_Core_GetObjState::get_rados_obj(const DoutPrefixProvider *dpp, - RGWSI_RADOS *rados_svc, + librados::Rados* rados, RGWSI_Zone *zone_svc, const rgw_raw_obj& obj, - RGWSI_RADOS::Obj **pobj) + rgw_rados_ref** pobj) { if (!has_rados_obj) { if (obj.oid.empty()) { @@ -23,8 +22,7 @@ int RGWSI_SysObj_Core_GetObjState::get_rados_obj(const DoutPrefixProvider *dpp, return -EINVAL; } - rados_obj = rados_svc->obj(obj); - int r = rados_obj.open(dpp); + int r = rgw_get_rados_ref(dpp, rados, obj, &rados_obj); if (r < 0) { return r; } @@ -37,15 +35,14 @@ int RGWSI_SysObj_Core_GetObjState::get_rados_obj(const DoutPrefixProvider *dpp, int RGWSI_SysObj_Core::get_rados_obj(const DoutPrefixProvider *dpp, RGWSI_Zone *zone_svc, const rgw_raw_obj& obj, - RGWSI_RADOS::Obj *pobj) + rgw_rados_ref* pobj) { if (obj.oid.empty()) { ldpp_dout(dpp, 0) << "ERROR: obj.oid is empty" << dendl; return -EINVAL; } - *pobj = rados_svc->obj(obj); - int r = pobj->open(dpp); + int r = rgw_get_rados_ref(dpp, rados, obj, pobj); if (r < 0) { return r; } @@ -59,7 +56,7 @@ int RGWSI_SysObj_Core::raw_stat(const DoutPrefixProvider *dpp, const rgw_raw_obj RGWObjVersionTracker *objv_tracker, optional_yield y) { - RGWSI_RADOS::Obj rados_obj; + rgw_rados_ref rados_obj; int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); if (r < 0) { return r; @@ -172,7 +169,7 @@ int RGWSI_SysObj_Core::read(const DoutPrefixProvider *dpp, } } - RGWSI_RADOS::Obj rados_obj; + rgw_rados_ref rados_obj; int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); if (r < 0) { ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; @@ -185,7 +182,7 @@ int RGWSI_SysObj_Core::read(const DoutPrefixProvider *dpp, } ldpp_dout(dpp, 20) << "rados_obj.operate() r=" << r << " bl.length=" << bl->length() << dendl; - uint64_t op_ver = rados_obj.get_last_version(); + uint64_t op_ver = rados_obj.ioctx.get_last_version(); if (read_state.last_ver > 0 && read_state.last_ver != op_ver) { @@ -218,7 +215,7 @@ int RGWSI_SysObj_Core::get_attr(const DoutPrefixProvider *dpp, bufferlist *dest, optional_yield y) { - RGWSI_RADOS::Obj rados_obj; + rgw_rados_ref rados_obj; int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); if (r < 0) { ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; @@ -229,7 +226,7 @@ int RGWSI_SysObj_Core::get_attr(const DoutPrefixProvider *dpp, int rval; op.getxattr(name, dest, &rval); - + r = rados_obj.operate(dpp, &op, nullptr, y); if (r < 0) return r; @@ -244,7 +241,7 @@ int RGWSI_SysObj_Core::set_attrs(const DoutPrefixProvider *dpp, RGWObjVersionTracker *objv_tracker, bool exclusive, optional_yield y) { - RGWSI_RADOS::Obj rados_obj; + rgw_rados_ref rados_obj; int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); if (r < 0) { ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; @@ -301,7 +298,7 @@ int RGWSI_SysObj_Core::omap_get_vals(const DoutPrefixProvider *dpp, bool *pmore, optional_yield y) { - RGWSI_RADOS::Obj rados_obj; + rgw_rados_ref rados_obj; int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); if (r < 0) { ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; @@ -341,7 +338,7 @@ int RGWSI_SysObj_Core::omap_get_all(const DoutPrefixProvider *dpp, std::map<string, bufferlist> *m, optional_yield y) { - RGWSI_RADOS::Obj rados_obj; + rgw_rados_ref rados_obj; int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); if (r < 0) { ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; @@ -359,7 +356,7 @@ int RGWSI_SysObj_Core::omap_get_all(const DoutPrefixProvider *dpp, std::map<string, bufferlist> t; int rval; op.omap_get_vals2(start_after, count, &t, &more, &rval); - + r = rados_obj.operate(dpp, &op, nullptr, y); if (r < 0) { return r; @@ -377,7 +374,7 @@ int RGWSI_SysObj_Core::omap_set(const DoutPrefixProvider *dpp, const rgw_raw_obj bufferlist& bl, bool must_exist, optional_yield y) { - RGWSI_RADOS::Obj rados_obj; + rgw_rados_ref rados_obj; int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); if (r < 0) { ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; @@ -400,7 +397,7 @@ int RGWSI_SysObj_Core::omap_set(const DoutPrefixProvider *dpp, const rgw_raw_obj const std::map<std::string, bufferlist>& m, bool must_exist, optional_yield y) { - RGWSI_RADOS::Obj rados_obj; + rgw_rados_ref rados_obj; int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); if (r < 0) { ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; @@ -418,7 +415,7 @@ int RGWSI_SysObj_Core::omap_set(const DoutPrefixProvider *dpp, const rgw_raw_obj int RGWSI_SysObj_Core::omap_del(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, const std::string& key, optional_yield y) { - RGWSI_RADOS::Obj rados_obj; + rgw_rados_ref rados_obj; int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); if (r < 0) { ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; @@ -440,7 +437,7 @@ int RGWSI_SysObj_Core::notify(const DoutPrefixProvider *dpp, const rgw_raw_obj& uint64_t timeout_ms, bufferlist *pbl, optional_yield y) { - RGWSI_RADOS::Obj rados_obj; + rgw_rados_ref rados_obj; int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); if (r < 0) { ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; @@ -456,7 +453,7 @@ int RGWSI_SysObj_Core::remove(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, optional_yield y) { - RGWSI_RADOS::Obj rados_obj; + rgw_rados_ref rados_obj; int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); if (r < 0) { ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; @@ -487,7 +484,7 @@ int RGWSI_SysObj_Core::write(const DoutPrefixProvider *dpp, real_time set_mtime, optional_yield y) { - RGWSI_RADOS::Obj rados_obj; + rgw_rados_ref rados_obj; int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); if (r < 0) { ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; @@ -552,7 +549,7 @@ int RGWSI_SysObj_Core::write_data(const DoutPrefixProvider *dpp, RGWObjVersionTracker *objv_tracker, optional_yield y) { - RGWSI_RADOS::Obj rados_obj; + rgw_rados_ref rados_obj; int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); if (r < 0) { ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; @@ -585,21 +582,17 @@ int RGWSI_SysObj_Core::pool_list_prefixed_objs(const DoutPrefixProvider *dpp, { bool is_truncated; - auto rados_pool = rados_svc->pool(pool); - - auto op = rados_pool.op(); + librados::IoCtx rados_pool; + rgw_init_ioctx(dpp, rados, pool, rados_pool, true, false); - RGWAccessListFilterPrefix filter(prefix); - - int r = op.init(dpp, string(), &filter); - if (r < 0) { - return r; - } + auto filter{rgw::AccessListFilterPrefix(prefix)}; + std::string marker; do { vector<string> oids; -#define MAX_OBJS_DEFAULT 1000 - int r = op.get_next(dpp, MAX_OBJS_DEFAULT, &oids, &is_truncated); + static constexpr auto MAX_OBJS_DEFAULT = 1000u; + int r = rgw_list_pool(dpp, rados_pool, MAX_OBJS_DEFAULT, filter, marker, + &oids, &is_truncated); if (r < 0) { return r; } @@ -619,16 +612,13 @@ int RGWSI_SysObj_Core::pool_list_objects_init(const DoutPrefixProvider *dpp, const string& prefix, RGWSI_SysObj::Pool::ListCtx *_ctx) { - _ctx->impl.emplace<PoolListImplInfo>(prefix); + _ctx->impl.emplace<PoolListImplInfo>(prefix, marker); auto& ctx = static_cast<PoolListImplInfo&>(*_ctx->impl); - ctx.pool = rados_svc->pool(pool); - ctx.op = ctx.pool.op(); - - int r = ctx.op.init(dpp, marker, &ctx.filter); + int r = rgw_init_ioctx(dpp, rados, pool, ctx.pool, true, false); if (r < 0) { - ldpp_dout(dpp, 10) << "failed to list objects pool_iterate_begin() returned r=" << r << dendl; + ldpp_dout(dpp, 10) << "failed to create IoCtx returned r=" << r << dendl; return r; } return 0; @@ -644,7 +634,8 @@ int RGWSI_SysObj_Core::pool_list_objects_next(const DoutPrefixProvider *dpp, return -EINVAL; } auto& ctx = static_cast<PoolListImplInfo&>(*_ctx.impl); - int r = ctx.op.get_next(dpp, max, oids, is_truncated); + int r = rgw_list_pool(dpp, ctx.pool, max, ctx.filter, ctx.marker, oids, + is_truncated); if (r < 0) { if(r != -ENOENT) ldpp_dout(dpp, 10) << "failed to list objects pool_iterate returned r=" << r << dendl; @@ -662,5 +653,6 @@ int RGWSI_SysObj_Core::pool_list_objects_get_marker(RGWSI_SysObj::Pool::ListCtx& } auto& ctx = static_cast<PoolListImplInfo&>(*_ctx.impl); - return ctx.op.get_marker(marker); + *marker = ctx.marker; + return 0; } diff --git a/src/rgw/services/svc_sys_obj_core.h b/src/rgw/services/svc_sys_obj_core.h index d02a37eee8a..d9dfc02f01b 100644 --- a/src/rgw/services/svc_sys_obj_core.h +++ b/src/rgw/services/svc_sys_obj_core.h @@ -5,7 +5,6 @@ #include "rgw_service.h" -#include "svc_rados.h" #include "svc_sys_obj.h" #include "svc_sys_obj_core_types.h" @@ -16,22 +15,22 @@ struct rgw_cache_entry_info; class RGWSI_SysObj_Core : public RGWServiceInstance { - friend class RGWServices_Def; + friend struct RGWServices_Def; friend class RGWSI_SysObj; protected: - RGWSI_RADOS *rados_svc{nullptr}; + librados::Rados* rados{nullptr}; RGWSI_Zone *zone_svc{nullptr}; using GetObjState = RGWSI_SysObj_Core_GetObjState; using PoolListImplInfo = RGWSI_SysObj_Core_PoolListImplInfo; - void core_init(RGWSI_RADOS *_rados_svc, + void core_init(librados::Rados* rados_, RGWSI_Zone *_zone_svc) { - rados_svc = _rados_svc; + rados = rados_; zone_svc = _zone_svc; } - int get_rados_obj(const DoutPrefixProvider *dpp, RGWSI_Zone *zone_svc, const rgw_raw_obj& obj, RGWSI_RADOS::Obj *pobj); + int get_rados_obj(const DoutPrefixProvider *dpp, RGWSI_Zone *zone_svc, const rgw_raw_obj& obj, rgw_rados_ref* pobj); virtual int raw_stat(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, uint64_t *psize, real_time *pmtime, diff --git a/src/rgw/services/svc_sys_obj_core_types.h b/src/rgw/services/svc_sys_obj_core_types.h index 74f489d914e..a7b6aed0842 100644 --- a/src/rgw/services/svc_sys_obj_core_types.h +++ b/src/rgw/services/svc_sys_obj_core_types.h @@ -4,31 +4,33 @@ #pragma once +#include "rgw_tools.h" #include "rgw_service.h" -#include "svc_rados.h" #include "svc_sys_obj_types.h" struct RGWSI_SysObj_Core_GetObjState : public RGWSI_SysObj_Obj_GetObjState { - RGWSI_RADOS::Obj rados_obj; + rgw_rados_ref rados_obj; bool has_rados_obj{false}; uint64_t last_ver{0}; RGWSI_SysObj_Core_GetObjState() {} int get_rados_obj(const DoutPrefixProvider *dpp, - RGWSI_RADOS *rados_svc, + librados::Rados* rados_svc, RGWSI_Zone *zone_svc, const rgw_raw_obj& obj, - RGWSI_RADOS::Obj **pobj); + rgw_rados_ref** pobj); }; struct RGWSI_SysObj_Core_PoolListImplInfo : public RGWSI_SysObj_Pool_ListInfo { - RGWSI_RADOS::Pool pool; - RGWSI_RADOS::Pool::List op; - RGWAccessListFilterPrefix filter; + librados::IoCtx pool; + rgw::AccessListFilter filter; + std::string marker; - RGWSI_SysObj_Core_PoolListImplInfo(const std::string& prefix) : op(pool.op()), filter(prefix) {} + RGWSI_SysObj_Core_PoolListImplInfo(const std::string& prefix, + const std::string& marker) + : filter(rgw::AccessListFilterPrefix(prefix)), marker(marker) {} }; diff --git a/src/rgw/services/svc_tier_rados.cc b/src/rgw/services/svc_tier_rados.cc index ca87e8aceb9..86ccb5eca56 100644 --- a/src/rgw/services/svc_tier_rados.cc +++ b/src/rgw/services/svc_tier_rados.cc @@ -7,9 +7,7 @@ using namespace std; const std::string MP_META_SUFFIX = ".meta"; -MultipartMetaFilter::~MultipartMetaFilter() {} - -bool MultipartMetaFilter::filter(const string& name, string& key) { +bool MultipartMetaFilter(const string& name, string& key) { // the length of the suffix so we can skip past it static const size_t MP_META_SUFFIX_LEN = MP_META_SUFFIX.length(); @@ -32,5 +30,3 @@ bool MultipartMetaFilter::filter(const string& name, string& key) { return true; } - - diff --git a/src/rgw/services/svc_tier_rados.h b/src/rgw/services/svc_tier_rados.h index a2036b93347..0327e465881 100644 --- a/src/rgw/services/svc_tier_rados.h +++ b/src/rgw/services/svc_tier_rados.h @@ -20,8 +20,6 @@ #include "rgw_service.h" -#include "svc_rados.h" - extern const std::string MP_META_SUFFIX; class RGWMPObj { @@ -110,21 +108,14 @@ public: * the name provided is such. It will also extract the key used for * bucket index shard calculation from the adorned name. */ -class MultipartMetaFilter : public RGWAccessListFilter { -public: - MultipartMetaFilter() {} - - virtual ~MultipartMetaFilter() override; - - /** - * @param name [in] The object name as it appears in the bucket index. - * @param key [out] An output parameter that will contain the bucket - * index key if this entry is in the form of a multipart meta object. - * @return true if the name provided is in the form of a multipart meta - * object, false otherwise - */ - bool filter(const std::string& name, std::string& key) override; -}; +/** + * @param name [in] The object name as it appears in the bucket index. + * @param key [out] An output parameter that will contain the bucket + * index key if this entry is in the form of a multipart meta object. + * @return true if the name provided is in the form of a multipart meta + * object, false otherwise + */ +bool MultipartMetaFilter(const std::string& name, std::string& key); class RGWSI_Tier_RADOS : public RGWServiceInstance { diff --git a/src/rgw/services/svc_user.h b/src/rgw/services/svc_user.h index 1cb459d31cb..f53ec49f259 100644 --- a/src/rgw/services/svc_user.h +++ b/src/rgw/services/svc_user.h @@ -20,9 +20,9 @@ #include "svc_meta_be.h" #include "rgw_service.h" +#include "rgw_sal_fwd.h" class RGWUserBuckets; -class RGWGetUserStats_CB; class RGWSI_User : public RGWServiceInstance { @@ -122,6 +122,7 @@ public: optional_yield y) = 0; /* last time a stats update was done */ virtual int read_stats_async(const DoutPrefixProvider *dpp, - const rgw_user& user, RGWGetUserStats_CB *cb) = 0; + const rgw_user& user, + boost::intrusive_ptr<rgw::sal::ReadStatsCB> cb) = 0; }; diff --git a/src/rgw/services/svc_user_rados.cc b/src/rgw/services/svc_user_rados.cc index c99af935415..dd48bc4c01e 100644 --- a/src/rgw/services/svc_user_rados.cc +++ b/src/rgw/services/svc_user_rados.cc @@ -67,14 +67,14 @@ RGWSI_User_RADOS::RGWSI_User_RADOS(CephContext *cct): RGWSI_User(cct) { RGWSI_User_RADOS::~RGWSI_User_RADOS() { } -void RGWSI_User_RADOS::init(RGWSI_RADOS *_rados_svc, +void RGWSI_User_RADOS::init(librados::Rados* rados_, RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc, RGWSI_SysObj_Cache *_cache_svc, RGWSI_Meta *_meta_svc, RGWSI_MetaBackend *_meta_be_svc, RGWSI_SyncModules *_sync_modules_svc) { svc.user = this; - svc.rados = _rados_svc; + rados = rados_; svc.zone = _zone_svc; svc.sysobj = _sysobj_svc; svc.cache = _cache_svc; @@ -602,8 +602,8 @@ int RGWSI_User_RADOS::get_user_info_by_access_key(RGWSI_MetaBackend::Context *ct int RGWSI_User_RADOS::cls_user_update_buckets(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries, bool add, optional_yield y) { - auto rados_obj = svc.rados->obj(obj); - int r = rados_obj.open(dpp); + rgw_rados_ref rados_obj; + int r = rgw_get_rados_ref(dpp, rados, obj, &rados_obj); if (r < 0) { return r; } @@ -628,8 +628,8 @@ int RGWSI_User_RADOS::cls_user_add_bucket(const DoutPrefixProvider *dpp, rgw_raw int RGWSI_User_RADOS::cls_user_remove_bucket(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, const cls_user_bucket& bucket, optional_yield y) { - auto rados_obj = svc.rados->obj(obj); - int r = rados_obj.open(dpp); + rgw_rados_ref rados_obj; + int r = rgw_get_rados_ref(dpp, rados, obj, &rados_obj); if (r < 0) { return r; } @@ -716,8 +716,8 @@ int RGWSI_User_RADOS::cls_user_list_buckets(const DoutPrefixProvider *dpp, bool * const truncated, optional_yield y) { - auto rados_obj = svc.rados->obj(obj); - int r = rados_obj.open(dpp); + rgw_rados_ref rados_obj; + int r = rgw_get_rados_ref(dpp, rados, obj, &rados_obj); if (r < 0) { return r; } @@ -804,12 +804,14 @@ int RGWSI_User_RADOS::reset_bucket_stats(const DoutPrefixProvider *dpp, int RGWSI_User_RADOS::cls_user_reset_stats(const DoutPrefixProvider *dpp, const rgw_user& user, optional_yield y) { rgw_raw_obj obj = get_buckets_obj(user); - auto rados_obj = svc.rados->obj(obj); - int rval, r = rados_obj.open(dpp); + rgw_rados_ref rados_obj; + int r = rgw_get_rados_ref(dpp, rados, obj, &rados_obj); if (r < 0) { return r; } + int rval; + cls_user_reset_stats2_op call; cls_user_reset_stats2_ret ret; @@ -841,11 +843,12 @@ int RGWSI_User_RADOS::complete_flush_stats(const DoutPrefixProvider *dpp, const rgw_user& user, optional_yield y) { rgw_raw_obj obj = get_buckets_obj(user); - auto rados_obj = svc.rados->obj(obj); - int r = rados_obj.open(dpp); + rgw_rados_ref rados_obj; + int r = rgw_get_rados_ref(dpp, rados, obj, &rados_obj); if (r < 0) { return r; } + librados::ObjectWriteOperation op; ::cls_user_complete_stats_sync(op); return rados_obj.operate(dpp, &op, y); @@ -856,8 +859,8 @@ int RGWSI_User_RADOS::cls_user_get_header(const DoutPrefixProvider *dpp, optional_yield y) { rgw_raw_obj obj = get_buckets_obj(user); - auto rados_obj = svc.rados->obj(obj); - int r = rados_obj.open(dpp); + rgw_rados_ref rados_obj; + int r = rgw_get_rados_ref(dpp, rados, obj, &rados_obj); if (r < 0) { return r; } @@ -871,15 +874,13 @@ int RGWSI_User_RADOS::cls_user_get_header(const DoutPrefixProvider *dpp, int RGWSI_User_RADOS::cls_user_get_header_async(const DoutPrefixProvider *dpp, const string& user_str, RGWGetUserHeader_CB *cb) { rgw_raw_obj obj = get_buckets_obj(rgw_user(user_str)); - auto rados_obj = svc.rados->obj(obj); - int r = rados_obj.open(dpp); + rgw_rados_ref ref; + int r = rgw_get_rados_ref(dpp, rados, obj, &ref); if (r < 0) { return r; } - auto& ref = rados_obj.get_ref(); - - r = ::cls_user_get_header_async(ref.pool.ioctx(), ref.obj.oid, cb); + r = ::cls_user_get_header_async(ref.ioctx, ref.obj.oid, cb); if (r < 0) { return r; } @@ -927,36 +928,32 @@ int RGWSI_User_RADOS::read_stats(const DoutPrefixProvider *dpp, } class RGWGetUserStatsContext : public RGWGetUserHeader_CB { - RGWGetUserStats_CB *cb; + boost::intrusive_ptr<rgw::sal::ReadStatsCB> cb; public: - explicit RGWGetUserStatsContext(RGWGetUserStats_CB * const cb) - : cb(cb) {} + explicit RGWGetUserStatsContext(boost::intrusive_ptr<rgw::sal::ReadStatsCB> cb) + : cb(std::move(cb)) {} void handle_response(int r, cls_user_header& header) override { const cls_user_stats& hs = header.stats; - if (r >= 0) { - RGWStorageStats stats; - - stats.size = hs.total_bytes; - stats.size_rounded = hs.total_bytes_rounded; - stats.num_objects = hs.total_entries; - - cb->set_response(stats); - } + RGWStorageStats stats; - cb->handle_response(r); + stats.size = hs.total_bytes; + stats.size_rounded = hs.total_bytes_rounded; + stats.num_objects = hs.total_entries; - cb->put(); + cb->handle_response(r, stats); + cb.reset(); } }; int RGWSI_User_RADOS::read_stats_async(const DoutPrefixProvider *dpp, - const rgw_user& user, RGWGetUserStats_CB *_cb) + const rgw_user& user, + boost::intrusive_ptr<rgw::sal::ReadStatsCB> _cb) { string user_str = user.to_str(); - RGWGetUserStatsContext *cb = new RGWGetUserStatsContext(_cb); + RGWGetUserStatsContext *cb = new RGWGetUserStatsContext(std::move(_cb)); int r = cls_user_get_header_async(dpp, user_str, cb); if (r < 0) { delete cb; diff --git a/src/rgw/services/svc_user_rados.h b/src/rgw/services/svc_user_rados.h index 4712d1fdafd..6912327f0b1 100644 --- a/src/rgw/services/svc_user_rados.h +++ b/src/rgw/services/svc_user_rados.h @@ -23,7 +23,6 @@ #include "driver/rados/rgw_bucket.h" // FIXME: subclass dependency -class RGWSI_RADOS; class RGWSI_Zone; class RGWSI_SysObj; class RGWSI_SysObj_Cache; @@ -34,7 +33,6 @@ class RGWSI_MetaBackend_Handler; struct rgw_cache_entry_info; class RGWGetUserHeader_CB; -class RGWGetUserStats_CB; template <class T> class RGWChainedCacheImpl; @@ -97,9 +95,10 @@ class RGWSI_User_RADOS : public RGWSI_User int do_start(optional_yield, const DoutPrefixProvider *dpp) override; public: + librados::Rados* rados{nullptr}; + struct Svc { RGWSI_User_RADOS *user{nullptr}; - RGWSI_RADOS *rados{nullptr}; RGWSI_Zone *zone{nullptr}; RGWSI_SysObj *sysobj{nullptr}; RGWSI_SysObj_Cache *cache{nullptr}; @@ -111,7 +110,7 @@ public: RGWSI_User_RADOS(CephContext *cct); ~RGWSI_User_RADOS(); - void init(RGWSI_RADOS *_rados_svc, + void init(librados::Rados* rados_, RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc, RGWSI_SysObj_Cache *_cache_svc, RGWSI_Meta *_meta_svc, RGWSI_MetaBackend *_meta_be_svc, @@ -207,6 +206,6 @@ public: optional_yield y) override; /* last time a stats update was done */ int read_stats_async(const DoutPrefixProvider *dpp, const rgw_user& user, - RGWGetUserStats_CB *cb) override; + boost::intrusive_ptr<rgw::sal::ReadStatsCB> cb) override; }; diff --git a/src/rgw/services/svc_zone.cc b/src/rgw/services/svc_zone.cc index 8bb55e4513d..70cf40eb6cb 100644 --- a/src/rgw/services/svc_zone.cc +++ b/src/rgw/services/svc_zone.cc @@ -2,10 +2,10 @@ // vim: ts=8 sw=2 smarttab ft=cpp #include "svc_zone.h" -#include "svc_rados.h" #include "svc_sys_obj.h" #include "svc_sync_modules.h" +#include "rgw_tools.h" #include "rgw_zone.h" #include "rgw_rest_conn.h" #include "rgw_bucket_sync.h" @@ -23,12 +23,12 @@ RGWSI_Zone::RGWSI_Zone(CephContext *cct) : RGWServiceInstance(cct) } void RGWSI_Zone::init(RGWSI_SysObj *_sysobj_svc, - RGWSI_RADOS * _rados_svc, + librados::Rados* rados_, RGWSI_SyncModules * _sync_modules_svc, RGWSI_Bucket_Sync *_bucket_sync_svc) { sysobj_svc = _sysobj_svc; - rados_svc = _rados_svc; + rados = rados_; sync_modules_svc = _sync_modules_svc; bucket_sync_svc = _bucket_sync_svc; @@ -134,11 +134,6 @@ int RGWSI_Zone::do_start(optional_yield y, const DoutPrefixProvider *dpp) assert(sysobj_svc->is_started()); /* if not then there's ordering issue */ - ret = rados_svc->start(y, dpp); - if (ret < 0) { - return ret; - } - ret = realm->init(dpp, cct, sysobj_svc, y); if (ret < 0 && ret != -ENOENT) { ldpp_dout(dpp, 0) << "failed reading realm info: ret "<< ret << " " << cpp_strerror(-ret) << dendl; diff --git a/src/rgw/services/svc_zone.h b/src/rgw/services/svc_zone.h index 3891b73ddb4..c4a3a28f0d7 100644 --- a/src/rgw/services/svc_zone.h +++ b/src/rgw/services/svc_zone.h @@ -6,7 +6,6 @@ #include "rgw_service.h" -class RGWSI_RADOS; class RGWSI_SysObj; class RGWSI_SyncModules; class RGWSI_Bucket_Sync; @@ -29,7 +28,7 @@ class RGWSI_Zone : public RGWServiceInstance friend struct RGWServices_Def; RGWSI_SysObj *sysobj_svc{nullptr}; - RGWSI_RADOS *rados_svc{nullptr}; + librados::Rados* rados{nullptr}; RGWSI_SyncModules *sync_modules_svc{nullptr}; RGWSI_Bucket_Sync *bucket_sync_svc{nullptr}; @@ -58,7 +57,7 @@ class RGWSI_Zone : public RGWServiceInstance std::unique_ptr<rgw_sync_policy_info> sync_policy; void init(RGWSI_SysObj *_sysobj_svc, - RGWSI_RADOS *_rados_svc, + librados::Rados* rados_, RGWSI_SyncModules *_sync_modules_svc, RGWSI_Bucket_Sync *_bucket_sync_svc); int do_start(optional_yield y, const DoutPrefixProvider *dpp) override; diff --git a/src/rgw/services/svc_zone_utils.cc b/src/rgw/services/svc_zone_utils.cc index 712bb97c9ba..c809974e6d5 100644 --- a/src/rgw/services/svc_zone_utils.cc +++ b/src/rgw/services/svc_zone_utils.cc @@ -2,9 +2,12 @@ // vim: ts=8 sw=2 smarttab ft=cpp #include "svc_zone_utils.h" -#include "svc_rados.h" #include "svc_zone.h" +#undef FMT_HEADER_ONLY +#define FMT_HEADER_ONLY 1 +#include <fmt/format.h> + #include "rgw_zone.h" using namespace std; @@ -18,26 +21,24 @@ int RGWSI_ZoneUtils::do_start(optional_yield, const DoutPrefixProvider *dpp) string RGWSI_ZoneUtils::gen_host_id() { /* uint64_t needs 16, two '-' separators and a trailing null */ - const string& zone_name = zone_svc->get_zone().name; - const string& zonegroup_name = zone_svc->get_zonegroup().get_name(); - char charbuf[16 + zone_name.size() + zonegroup_name.size() + 2 + 1]; - snprintf(charbuf, sizeof(charbuf), "%llx-%s-%s", (unsigned long long)rados_svc->instance_id(), zone_name.c_str(), zonegroup_name.c_str()); - return string(charbuf); + return fmt::format("{}-{}-{}", rados->get_instance_id(), + zone_svc->get_zone().name, + zone_svc->get_zonegroup().get_name()); } string RGWSI_ZoneUtils::unique_id(uint64_t unique_num) { - char buf[32]; - snprintf(buf, sizeof(buf), ".%llu.%llu", (unsigned long long)rados_svc->instance_id(), (unsigned long long)unique_num); - string s = zone_svc->get_zone_params().get_id() + buf; - return s; + return fmt::format("{}.{}.{}", + zone_svc->get_zone_params().get_id(), + rados->get_instance_id(), + unique_num); } void RGWSI_ZoneUtils::init_unique_trans_id_deps() { - char buf[16 + 2 + 1]; /* uint64_t needs 16, 2 hyphens add further 2 */ - - snprintf(buf, sizeof(buf), "-%llx-", (unsigned long long)rados_svc->instance_id()); - url_encode(string(buf) + zone_svc->get_zone().name, trans_id_suffix); + url_encode(fmt::format("-{}-{}", + rados->get_instance_id(), + zone_svc->get_zone().name), + trans_id_suffix); } /* In order to preserve compatibility with Swift API, transaction ID diff --git a/src/rgw/services/svc_zone_utils.h b/src/rgw/services/svc_zone_utils.h index 43e3fee8d93..41b9400335b 100644 --- a/src/rgw/services/svc_zone_utils.h +++ b/src/rgw/services/svc_zone_utils.h @@ -6,21 +6,20 @@ #include "rgw_service.h" -class RGWSI_RADOS; class RGWSI_Zone; class RGWSI_ZoneUtils : public RGWServiceInstance { friend struct RGWServices_Def; - RGWSI_RADOS *rados_svc{nullptr}; + librados::Rados* rados{nullptr}; RGWSI_Zone *zone_svc{nullptr}; std::string trans_id_suffix; - void init(RGWSI_RADOS *_rados_svc, + void init(librados::Rados* rados_, RGWSI_Zone *_zone_svc) { - rados_svc = _rados_svc; + rados = rados_; zone_svc = _zone_svc; } diff --git a/src/test/common/CMakeLists.txt b/src/test/common/CMakeLists.txt index c044daf662a..b2ed06ee306 100644 --- a/src/test/common/CMakeLists.txt +++ b/src/test/common/CMakeLists.txt @@ -390,6 +390,10 @@ target_link_libraries(unittest_blocked_completion Boost::system GTest::GTest) add_executable(unittest_allocate_unique test_allocate_unique.cc) add_ceph_unittest(unittest_allocate_unique) +add_executable(unittest_versioned_variant test_versioned_variant.cc) +add_ceph_unittest(unittest_versioned_variant) +target_link_libraries(unittest_versioned_variant common) + if(WITH_SYSTEMD) add_executable(unittest_journald_logger test_journald_logger.cc) target_link_libraries(unittest_journald_logger ceph-common) diff --git a/src/test/common/test_versioned_variant.cc b/src/test/common/test_versioned_variant.cc new file mode 100644 index 00000000000..81f12c23c2b --- /dev/null +++ b/src/test/common/test_versioned_variant.cc @@ -0,0 +1,341 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright contributors to the Ceph project + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "common/versioned_variant.h" +#include <bitset> +#include <string> +#include <gtest/gtest.h> + +namespace { + +// type with custom encoding +struct custom_type { + void encode(bufferlist& bl) const { + ENCODE_START(0, 0, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::const_iterator& bl) { + DECODE_START(0, bl); + DECODE_FINISH(bl); + } +}; +WRITE_CLASS_ENCODER(custom_type); + +} // anonymous namespace + +namespace ceph { + +TEST(VersionedVariant, Monostate) +{ + using Variant = std::variant<std::monostate>; + bufferlist bl; + { + Variant in; + versioned_variant::encode(in, bl); + } + { + Variant out; + auto p = bl.cbegin(); + ASSERT_NO_THROW(versioned_variant::decode(out, p)); + EXPECT_TRUE(std::holds_alternative<std::monostate>(out)); + } +} + +TEST(VersionedVariant, Custom) +{ + using Variant = std::variant<std::monostate, custom_type>; + bufferlist bl; + { + Variant in = custom_type{}; + versioned_variant::encode(in, bl); + } + { + Variant out; + auto p = bl.cbegin(); + ASSERT_NO_THROW(versioned_variant::decode(out, p)); + EXPECT_TRUE(std::holds_alternative<custom_type>(out)); + } +} + +TEST(VersionedVariant, DuplicateFirst) +{ + using Variant = std::variant<int, int>; + bufferlist bl; + { + Variant in; + in.emplace<0>(42); + versioned_variant::encode(in, bl); + } + { + Variant out; + auto p = bl.cbegin(); + ASSERT_NO_THROW(versioned_variant::decode(out, p)); + ASSERT_EQ(0, out.index()); + EXPECT_EQ(42, std::get<0>(out)); + } +} + +TEST(VersionedVariant, DuplicateSecond) +{ + using Variant = std::variant<int, int>; + bufferlist bl; + { + Variant in; + in.emplace<1>(42); + versioned_variant::encode(in, bl); + } + { + Variant out; + auto p = bl.cbegin(); + ASSERT_NO_THROW(versioned_variant::decode(out, p)); + ASSERT_EQ(1, out.index()); + EXPECT_EQ(42, std::get<1>(out)); + } +} + +TEST(VersionedVariant, EncodeOld) +{ + using V1 = std::variant<int>; + using V2 = std::variant<int, std::string>; + + bufferlist bl; + { + // use V1 to encode the initial type + V1 in = 42; + versioned_variant::encode(in, bl); + } + { + // can decode as V1 + V1 out; + auto p = bl.cbegin(); + ASSERT_NO_THROW(versioned_variant::decode(out, p)); + ASSERT_TRUE(std::holds_alternative<int>(out)); + EXPECT_EQ(42, std::get<int>(out)); + } + { + // can also decode as V2 + V2 out; + auto p = bl.cbegin(); + ASSERT_NO_THROW(versioned_variant::decode(out, p)); + ASSERT_TRUE(std::holds_alternative<int>(out)); + EXPECT_EQ(42, std::get<int>(out)); + } +} + +TEST(VersionedVariant, EncodeExisting) +{ + using V1 = std::variant<int>; + using V2 = std::variant<int, std::string>; + + bufferlist bl; + { + // use V2 to encode the type shared with V1 + V2 in = 42; + versioned_variant::encode(in, bl); + } + { + // can decode as V2 + V2 out; + auto p = bl.cbegin(); + ASSERT_NO_THROW(versioned_variant::decode(out, p)); + ASSERT_TRUE(std::holds_alternative<int>(out)); + EXPECT_EQ(42, std::get<int>(out)); + } + { + // can also decode as V1 + V1 out; + auto p = bl.cbegin(); + ASSERT_NO_THROW(versioned_variant::decode(out, p)); + ASSERT_TRUE(std::holds_alternative<int>(out)); + EXPECT_EQ(42, std::get<int>(out)); + } +} + +TEST(VersionedVariant, EncodeNew) +{ + using V1 = std::variant<int>; + using V2 = std::variant<int, std::string>; + + bufferlist bl; + { + // use V2 to encode the new string type + V2 in = "42"; + versioned_variant::encode(in, bl); + } + { + // can decode as V2 + V2 out; + auto p = bl.cbegin(); + ASSERT_NO_THROW(versioned_variant::decode(out, p)); + ASSERT_TRUE(std::holds_alternative<std::string>(out)); + EXPECT_EQ("42", std::get<std::string>(out)); + } + { + // can't decode as V1 + V1 out; + auto p = bl.cbegin(); + EXPECT_THROW(versioned_variant::decode(out, p), buffer::malformed_input); + } +} + + +TEST(ConvertedVariant, Custom) +{ + using Variant = std::variant<custom_type>; + bufferlist bl; + { + Variant in = custom_type{}; + converted_variant::encode(in, bl); + } + { + Variant out; + auto p = bl.cbegin(); + ASSERT_NO_THROW(converted_variant::decode(out, p)); + EXPECT_TRUE(std::holds_alternative<custom_type>(out)); + } +} + +TEST(ConvertedVariant, DuplicateFirst) +{ + using Variant = std::variant<custom_type, int, int>; + bufferlist bl; + { + Variant in; + in.emplace<1>(42); + converted_variant::encode(in, bl); + } + { + Variant out; + auto p = bl.cbegin(); + ASSERT_NO_THROW(converted_variant::decode(out, p)); + ASSERT_EQ(1, out.index()); + EXPECT_EQ(42, std::get<1>(out)); + } +} + +TEST(ConvertedVariant, DuplicateSecond) +{ + using Variant = std::variant<custom_type, int, int>; + bufferlist bl; + { + Variant in; + in.emplace<2>(42); + converted_variant::encode(in, bl); + } + { + Variant out; + auto p = bl.cbegin(); + ASSERT_NO_THROW(converted_variant::decode(out, p)); + ASSERT_EQ(2, out.index()); + EXPECT_EQ(42, std::get<2>(out)); + } +} + +TEST(ConvertedVariant, EncodeOld) +{ + using V1 = custom_type; + using V2 = std::variant<custom_type, int>; + + bufferlist bl; + { + // use V1 to encode the initial type + V1 in; + encode(in, bl); + } + { + // can decode as V1 + V1 out; + auto p = bl.cbegin(); + EXPECT_NO_THROW(decode(out, p)); + } + { + // can also decode as V2 + V2 out; + auto p = bl.cbegin(); + ASSERT_NO_THROW(converted_variant::decode(out, p)); + EXPECT_TRUE(std::holds_alternative<custom_type>(out)); + } +} + +TEST(ConvertedVariant, EncodeExisting) +{ + using V1 = custom_type; + using V2 = std::variant<custom_type, int>; + + bufferlist bl; + { + // use V2 to encode the type shared with V1 + V2 in; + converted_variant::encode(in, bl); + } + { + // can decode as V2 + V2 out; + auto p = bl.cbegin(); + ASSERT_NO_THROW(converted_variant::decode(out, p)); + EXPECT_TRUE(std::holds_alternative<custom_type>(out)); + } + { + // can also decode as V1 + V1 out; + auto p = bl.cbegin(); + EXPECT_NO_THROW(decode(out, p)); + } +} + +TEST(ConvertedVariant, EncodeNew) +{ + using V1 = custom_type; + using V2 = std::variant<custom_type, int>; + + bufferlist bl; + { + // use V2 to encode the new type + V2 in = 42; + converted_variant::encode(in, bl); + } + { + // can decode as V2 + V2 out; + auto p = bl.cbegin(); + ASSERT_NO_THROW(converted_variant::decode(out, p)); + ASSERT_TRUE(std::holds_alternative<int>(out)); + EXPECT_EQ(42, std::get<int>(out)); + } + { + // can't decode as V1 + V1 out; + auto p = bl.cbegin(); + EXPECT_THROW(decode(out, p), buffer::malformed_input); + } +} + +TEST(Variant, GenerateTestInstances) +{ + using Variant = std::variant<int, bool, double>; + + std::bitset<std::variant_size_v<Variant>> bits; + ASSERT_TRUE(bits.none()); + + std::list<Variant> instances; + generate_test_instances(instances); + + for (const auto& v : instances) { + bits.set(v.index()); + } + + EXPECT_TRUE(bits.all()); +} + +} // namespace ceph diff --git a/src/test/librbd/CMakeLists.txt b/src/test/librbd/CMakeLists.txt index 0ae29b8bf18..c3f0edbea5d 100644 --- a/src/test/librbd/CMakeLists.txt +++ b/src/test/librbd/CMakeLists.txt @@ -19,7 +19,8 @@ set(librbd_test test_Operations.cc test_Trash.cc journal/test_Entries.cc - journal/test_Replay.cc) + journal/test_Replay.cc + journal/test_Stress.cc) add_library(rbd_test STATIC ${librbd_test}) target_link_libraries(rbd_test PRIVATE rbd_test_support diff --git a/src/test/librbd/io/test_mock_ImageRequest.cc b/src/test/librbd/io/test_mock_ImageRequest.cc index 9d6423d66c4..6ee67fe5f1c 100644 --- a/src/test/librbd/io/test_mock_ImageRequest.cc +++ b/src/test/librbd/io/test_mock_ImageRequest.cc @@ -16,12 +16,15 @@ namespace { struct MockTestImageCtx; struct MockTestJournal : public MockJournal { - MOCK_METHOD4(append_write_event, uint64_t(uint64_t, size_t, + MOCK_METHOD3(append_write_event, uint64_t(const io::Extents&, const bufferlist &, bool)); + MOCK_METHOD3(append_write_same_event, uint64_t(const io::Extents&, + const bufferlist &, bool)); MOCK_METHOD5(append_compare_and_write_event, uint64_t(uint64_t, size_t, const bufferlist &, const bufferlist &, bool)); + MOCK_METHOD3(append_discard_event, uint64_t(const io::Extents&, uint32_t, bool)); MOCK_METHOD5(append_io_event_mock, uint64_t(const journal::EventEntry&, uint64_t, size_t, bool, int)); uint64_t append_io_event(journal::EventEntry &&event_entry, @@ -119,9 +122,10 @@ struct TestMockIoImageRequest : public TestMockFixture { } } - void expect_journal_append_io_event(MockTestJournal &mock_journal, uint64_t journal_tid, - uint64_t offset, size_t length) { - EXPECT_CALL(mock_journal, append_io_event_mock(_, offset, length, _, _)) + void expect_journal_append_discard_event(MockTestJournal &mock_journal, + uint64_t journal_tid, + const io::Extents& extents) { + EXPECT_CALL(mock_journal, append_discard_event(extents, _, _)) .WillOnce(Return(journal_tid)); } @@ -386,8 +390,8 @@ TEST_F(TestMockIoImageRequest, PartialDiscardJournalAppendEnabled) { InSequence seq; expect_get_modify_timestamp(mock_image_ctx, false); expect_is_journal_appending(mock_journal, true); - expect_journal_append_io_event(mock_journal, 0, 16, 63); - expect_journal_append_io_event(mock_journal, 1, 84, 100); + expect_journal_append_discard_event(mock_journal, 0, + {{16, 63}, {84, 100}}); expect_object_discard_request(mock_image_ctx, 0, 16, 63, 0); expect_object_discard_request(mock_image_ctx, 0, 84, 100, 0); @@ -419,8 +423,8 @@ TEST_F(TestMockIoImageRequest, TailDiscardJournalAppendEnabled) { InSequence seq; expect_get_modify_timestamp(mock_image_ctx, false); expect_is_journal_appending(mock_journal, true); - expect_journal_append_io_event( - mock_journal, 0, ictx->layout.object_size - 1024, 1024); + expect_journal_append_discard_event( + mock_journal, 0, {{ictx->layout.object_size - 1024, 1024}}); expect_object_discard_request( mock_image_ctx, 0, ictx->layout.object_size - 1024, 1024, 0); @@ -452,7 +456,7 @@ TEST_F(TestMockIoImageRequest, PruneRequiredDiscardJournalAppendEnabled) { InSequence seq; expect_get_modify_timestamp(mock_image_ctx, false); expect_is_journal_appending(mock_journal, true); - EXPECT_CALL(mock_journal, append_io_event_mock(_, _, _, _, _)).Times(0); + EXPECT_CALL(mock_journal, append_discard_event(_, _, _)).Times(0); EXPECT_CALL(*mock_image_ctx.io_object_dispatcher, send(_)).Times(0); C_SaferCond aio_comp_ctx; @@ -482,7 +486,7 @@ TEST_F(TestMockIoImageRequest, LengthModifiedDiscardJournalAppendEnabled) { InSequence seq; expect_get_modify_timestamp(mock_image_ctx, false); expect_is_journal_appending(mock_journal, true); - expect_journal_append_io_event(mock_journal, 0, 32, 32); + expect_journal_append_discard_event(mock_journal, 0, {{32, 32}}); expect_object_discard_request(mock_image_ctx, 0, 32, 32, 0); C_SaferCond aio_comp_ctx; @@ -513,10 +517,9 @@ TEST_F(TestMockIoImageRequest, DiscardGranularityJournalAppendEnabled) { InSequence seq; expect_get_modify_timestamp(mock_image_ctx, false); expect_is_journal_appending(mock_journal, true); - expect_journal_append_io_event(mock_journal, 0, 32, 32); - expect_journal_append_io_event(mock_journal, 1, 96, 64); - expect_journal_append_io_event( - mock_journal, 2, ictx->layout.object_size - 32, 32); + expect_journal_append_discard_event( + mock_journal, 0, + {{32, 32}, {96, 64}, {ictx->layout.object_size - 32, 32}}); expect_object_discard_request(mock_image_ctx, 0, 32, 32, 0); expect_object_discard_request(mock_image_ctx, 0, 96, 64, 0); expect_object_discard_request( diff --git a/src/test/librbd/io/test_mock_ObjectRequest.cc b/src/test/librbd/io/test_mock_ObjectRequest.cc index 0690b7722a0..c20c825018b 100644 --- a/src/test/librbd/io/test_mock_ObjectRequest.cc +++ b/src/test/librbd/io/test_mock_ObjectRequest.cc @@ -1926,7 +1926,7 @@ TEST_F(TestMockIoObjectRequest, ListSnapsWholeObject) { ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockTestImageCtx mock_image_ctx(*ictx); - mock_image_ctx.parent = &mock_image_ctx; + mock_image_ctx.snaps = {3}; InSequence seq; @@ -1937,13 +1937,120 @@ TEST_F(TestMockIoObjectRequest, ListSnapsWholeObject) { clone_info.cloneid = 3; clone_info.snaps = {3}; clone_info.overlap = std::vector<std::pair<uint64_t,uint64_t>>{{0, 1}}; - clone_info.size = 4194304; + clone_info.size = mock_image_ctx.layout.object_size; snap_set.clones.push_back(clone_info); clone_info.cloneid = CEPH_NOSNAP; clone_info.snaps = {}; clone_info.overlap = {}; - clone_info.size = 4194304; + clone_info.size = mock_image_ctx.layout.object_size; + snap_set.clones.push_back(clone_info); + + expect_list_snaps(mock_image_ctx, snap_set, 0); + + { + SnapshotDelta snapshot_delta; + C_SaferCond ctx; + auto req = MockObjectListSnapsRequest::create( + &mock_image_ctx, 0, {{0, mock_image_ctx.layout.object_size - 1}}, + {3, CEPH_NOSNAP}, 0, {}, &snapshot_delta, &ctx); + req->send(); + ASSERT_EQ(0, ctx.wait()); + + SnapshotDelta expected_snapshot_delta; + expected_snapshot_delta[{CEPH_NOSNAP,CEPH_NOSNAP}].insert( + 1, mock_image_ctx.layout.object_size - 2, + {SPARSE_EXTENT_STATE_DATA, mock_image_ctx.layout.object_size - 2}); + EXPECT_EQ(expected_snapshot_delta, snapshot_delta); + } + + expect_list_snaps(mock_image_ctx, snap_set, 0); + + { + SnapshotDelta snapshot_delta; + C_SaferCond ctx; + auto req = MockObjectListSnapsRequest::create( + &mock_image_ctx, 0, {{0, mock_image_ctx.layout.object_size - 1}}, + {3, CEPH_NOSNAP}, LIST_SNAPS_FLAG_WHOLE_OBJECT, {}, &snapshot_delta, + &ctx); + req->send(); + ASSERT_EQ(0, ctx.wait()); + + SnapshotDelta expected_snapshot_delta; + expected_snapshot_delta[{CEPH_NOSNAP,CEPH_NOSNAP}].insert( + 0, mock_image_ctx.layout.object_size - 1, + {SPARSE_EXTENT_STATE_DATA, mock_image_ctx.layout.object_size - 1}); + EXPECT_EQ(expected_snapshot_delta, snapshot_delta); + } +} + +TEST_F(TestMockIoObjectRequest, ListSnapsWholeObjectEndSize) { + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockTestImageCtx mock_image_ctx(*ictx); + mock_image_ctx.snaps = {3}; + + InSequence seq; + + librados::snap_set_t snap_set; + snap_set.seq = 3; + librados::clone_info_t clone_info; + + clone_info.cloneid = CEPH_NOSNAP; + clone_info.snaps = {}; + clone_info.overlap = {}; + // smaller than object extent (i.e. the op) to test end_size handling + clone_info.size = mock_image_ctx.layout.object_size - 2; + snap_set.clones.push_back(clone_info); + + expect_list_snaps(mock_image_ctx, snap_set, 0); + + { + SnapshotDelta snapshot_delta; + C_SaferCond ctx; + auto req = MockObjectListSnapsRequest::create( + &mock_image_ctx, 0, {{0, mock_image_ctx.layout.object_size - 1}}, + {4, CEPH_NOSNAP}, 0, {}, &snapshot_delta, &ctx); + req->send(); + ASSERT_EQ(0, ctx.wait()); + + EXPECT_TRUE(snapshot_delta.empty()); + } + + expect_list_snaps(mock_image_ctx, snap_set, 0); + + { + SnapshotDelta snapshot_delta; + C_SaferCond ctx; + auto req = MockObjectListSnapsRequest::create( + &mock_image_ctx, 0, {{0, mock_image_ctx.layout.object_size - 1}}, + {4, CEPH_NOSNAP}, LIST_SNAPS_FLAG_WHOLE_OBJECT, {}, &snapshot_delta, + &ctx); + req->send(); + ASSERT_EQ(0, ctx.wait()); + + EXPECT_TRUE(snapshot_delta.empty()); + } +} + +TEST_F(TestMockIoObjectRequest, ListSnapsNoSnapsInSnapSet) { + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockTestImageCtx mock_image_ctx(*ictx); + mock_image_ctx.snaps = {3}; + + InSequence seq; + + librados::snap_set_t snap_set; + snap_set.seq = 3; + librados::clone_info_t clone_info; + + clone_info.cloneid = 3; + clone_info.snaps = {}; + clone_info.overlap = {}; + clone_info.size = 0; snap_set.clones.push_back(clone_info); expect_list_snaps(mock_image_ctx, snap_set, 0); @@ -1960,7 +2067,7 @@ TEST_F(TestMockIoObjectRequest, ListSnapsWholeObject) { expected_snapshot_delta[{CEPH_NOSNAP,CEPH_NOSNAP}].insert( 0, mock_image_ctx.layout.object_size - 1, {SPARSE_EXTENT_STATE_DATA, mock_image_ctx.layout.object_size - 1}); - ASSERT_EQ(expected_snapshot_delta, snapshot_delta); + EXPECT_EQ(expected_snapshot_delta, snapshot_delta); } } // namespace io diff --git a/src/test/librbd/journal/test_Entries.cc b/src/test/librbd/journal/test_Entries.cc index c392fb9f88a..bb4b06c0368 100644 --- a/src/test/librbd/journal/test_Entries.cc +++ b/src/test/librbd/journal/test_Entries.cc @@ -196,6 +196,69 @@ TEST_F(TestJournalEntries, AioDiscard) { ASSERT_EQ(234U, aio_discard_event.length); } +TEST_F(TestJournalEntries, AioDiscardWithPrune) { + REQUIRE_FEATURE(RBD_FEATURE_JOURNALING); + + // The discard path can create multiple image extents (ImageRequest.cc) in the + // case where the discard request needs to be pruned and multiple objects are + // involved in the request. This test ensures that journal event entries are + // queued up for each image extent. + + // Create an image that is multiple objects so that we can force multiple + // image extents on the discard path. + CephContext* cct = reinterpret_cast<CephContext*>(_rados.cct()); + auto object_size = 1ull << cct->_conf.get_val<uint64_t>("rbd_default_order"); + auto image_size = 4 * object_size; + + auto image_name = get_temp_image_name(); + ASSERT_EQ(0, create_image_pp(m_rbd, m_ioctx, image_name, image_size)); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(image_name, &ictx)); + + ::journal::Journaler *journaler = create_journaler(ictx); + ASSERT_TRUE(journaler != NULL); + + C_SaferCond cond_ctx; + auto c = librbd::io::AioCompletion::create(&cond_ctx); + c->get(); + // We offset the discard by -4096 bytes and set discard granularity to 8192; + // this should cause two image extents to be formed in + // AbstractImageWriteRequest<I>::send_request(). + api::Io<>::aio_discard(*ictx, c, object_size - 4096, 2 * object_size, 8192, + true); + ASSERT_EQ(0, c->wait_for_complete()); + c->put(); + + for (uint64_t chunk = 0; chunk < 2; chunk++) { + auto offset = object_size; + auto size = object_size; + if (chunk == 1) { + offset = object_size * 2; + size = object_size - 8192; + } + + ::journal::ReplayEntry replay_entry; + if (!journaler->try_pop_front(&replay_entry)) { + ASSERT_TRUE(wait_for_entries_available(ictx)); + ASSERT_TRUE(journaler->try_pop_front(&replay_entry)); + } + + librbd::journal::EventEntry event_entry; + ASSERT_TRUE(get_event_entry(replay_entry, &event_entry)); + + ASSERT_EQ(librbd::journal::EVENT_TYPE_AIO_DISCARD, + event_entry.get_event_type()); + + librbd::journal::AioDiscardEvent aio_discard_event = + boost::get<librbd::journal::AioDiscardEvent>(event_entry.event); + ASSERT_EQ(offset, aio_discard_event.offset); + ASSERT_EQ(size, aio_discard_event.length); + + journaler->committed(replay_entry); + } +} + TEST_F(TestJournalEntries, AioFlush) { REQUIRE_FEATURE(RBD_FEATURE_JOURNALING); diff --git a/src/test/librbd/journal/test_Stress.cc b/src/test/librbd/journal/test_Stress.cc new file mode 100644 index 00000000000..d3df9147ae6 --- /dev/null +++ b/src/test/librbd/journal/test_Stress.cc @@ -0,0 +1,121 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/librados/test_cxx.h" +#include "test/librbd/test_fixture.h" +#include "test/librbd/test_support.h" +#include "cls/rbd/cls_rbd_types.h" +#include "cls/journal/cls_journal_types.h" +#include "cls/journal/cls_journal_client.h" +#include "journal/Journaler.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/ImageWatcher.h" +#include "librbd/internal.h" +#include "librbd/Journal.h" +#include "librbd/Operations.h" +#include "librbd/api/Io.h" +#include "librbd/api/Snapshot.h" +#include "librbd/io/AioCompletion.h" +#include "librbd/io/ImageDispatchSpec.h" +#include "librbd/io/ImageRequest.h" +#include "librbd/io/ReadResult.h" +#include "librbd/journal/Types.h" +#include <boost/scope_exit.hpp> + +void register_test_journal_stress() { +} + +namespace librbd { +namespace journal { + +class TestJournalStress : public TestFixture { +}; + +TEST_F(TestJournalStress, DiscardWithPruneWriteOverlap) { + REQUIRE_FEATURE(RBD_FEATURE_JOURNALING); + + // Overlap discards and writes while discard pruning is occurring. This tests + // the conditions under which https://tracker.ceph.com/issues/63422 occurred. + + // Create an image that is multiple objects so that we can force multiple + // image extents on the discard path. + int order = 22; + auto object_size = uint64_t{1} << order; + auto image_size = 4 * object_size; + + // Write-around cache required for overlapping I/O delays. + std::map<std::string, std::string> config; + config["rbd_cache"] = "true"; + config["rbd_cache_policy"] = "writearound"; + config["rbd_cache_max_dirty"] = std::to_string(image_size); + config["rbd_cache_writethrough_until_flush"] = "false"; + // XXX: Work around https://tracker.ceph.com/issues/63681, which this test + // exposes when run under Valgrind. + config["librados_thread_count"] = "15"; + + librados::Rados rados; + ASSERT_EQ("", connect_cluster_pp(rados, config)); + + librados::IoCtx ioctx; + ASSERT_EQ(0, rados.ioctx_create(_pool_name.c_str(), ioctx)); + + uint64_t features; + ASSERT_TRUE(::get_features(&features)); + auto image_name = get_temp_image_name(); + ASSERT_EQ(0, create_image_full_pp(m_rbd, ioctx, image_name, image_size, + features, false, &order)); + + auto ictx = new librbd::ImageCtx(image_name, "", nullptr, ioctx, false); + ASSERT_EQ(0, ictx->state->open(0)); + BOOST_SCOPE_EXIT(ictx) { + ictx->state->close(); + } BOOST_SCOPE_EXIT_END; + + std::thread write_thread( + [ictx, object_size]() { + std::string payload(object_size, '1'); + + for (auto i = 0; i < 200; i++) { + // Alternate overlaps with the two objects that the discard below + // touches. + for (auto offset = object_size; + offset < object_size * 3; + offset += object_size) { + bufferlist payload_bl; + payload_bl.append(payload); + auto aio_comp = new librbd::io::AioCompletion(); + api::Io<>::aio_write(*ictx, aio_comp, offset, payload.size(), + std::move(payload_bl), 0, true); + ASSERT_EQ(0, aio_comp->wait_for_complete()); + aio_comp->release(); + } + } + } + ); + + auto discard_exit = false; + std::thread discard_thread( + [ictx, object_size, &discard_exit]() { + while (!discard_exit) { + // We offset the discard by -4096 bytes and set discard granularity to + // 8192; this should cause two image extents to be formed in + // AbstractImageWriteRequest<I>::send_request() on objects 1 and 2, + // overlapping with the writes above. + auto aio_comp = new librbd::io::AioCompletion(); + api::Io<>::aio_discard(*ictx, aio_comp, object_size - 4096, + 2 * object_size, 8192, true); + ASSERT_EQ(0, aio_comp->wait_for_complete()); + aio_comp->release(); + } + } + ); + + write_thread.join(); + discard_exit = true; + discard_thread.join(); +} + +} // namespace journal +} // namespace librbd diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc index f4bb74fe397..9d17c4d85cf 100644 --- a/src/test/librbd/test_librbd.cc +++ b/src/test/librbd/test_librbd.cc @@ -7358,61 +7358,6 @@ interval_set<uint64_t> round_diff_interval(const interval_set<uint64_t>& diff, return rounded_diff; } -TEST_F(TestLibRBD, SnapDiff) -{ - REQUIRE_FEATURE(RBD_FEATURE_FAST_DIFF); - - rados_ioctx_t ioctx; - rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx); - - rbd_image_t image; - int order = 0; - std::string image_name = get_temp_image_name(); - uint64_t size = 100 << 20; - ASSERT_EQ(0, create_image(ioctx, image_name.c_str(), size, &order)); - ASSERT_EQ(0, rbd_open(ioctx, image_name.c_str(), &image, nullptr)); - - char test_data[TEST_IO_SIZE + 1]; - for (size_t i = 0; i < TEST_IO_SIZE; ++i) { - test_data[i] = (char) (rand() % (126 - 33) + 33); - } - test_data[TEST_IO_SIZE] = '\0'; - - ASSERT_PASSED(write_test_data, image, test_data, 0, - TEST_IO_SIZE, LIBRADOS_OP_FLAG_FADVISE_NOCACHE); - - interval_set<uint64_t> diff; - ASSERT_EQ(0, rbd_diff_iterate2(image, nullptr, 0, size, true, true, - iterate_cb, &diff)); - EXPECT_EQ(1 << order, diff.size()); - - ASSERT_EQ(0, rbd_snap_create(image, "snap1")); - ASSERT_EQ(0, rbd_snap_create(image, "snap2")); - - diff.clear(); - ASSERT_EQ(0, rbd_diff_iterate2(image, nullptr, 0, size, true, true, - iterate_cb, &diff)); - EXPECT_EQ(1 << order, diff.size()); - - diff.clear(); - ASSERT_EQ(0, rbd_diff_iterate2(image, "snap1", 0, size, true, true, - iterate_cb, &diff)); - EXPECT_EQ(0, diff.size()); - - diff.clear(); - ASSERT_EQ(0, rbd_diff_iterate2(image, "snap2", 0, size, true, true, - iterate_cb, &diff)); - EXPECT_EQ(0, diff.size()); - - ASSERT_EQ(0, rbd_snap_remove(image, "snap1")); - ASSERT_EQ(0, rbd_snap_remove(image, "snap2")); - - ASSERT_EQ(0, rbd_close(image)); - ASSERT_EQ(0, rbd_remove(ioctx, image_name.c_str())); - - rados_ioctx_destroy(ioctx); -} - template <typename T> class DiffIterateTest : public TestLibRBD { public: @@ -7501,12 +7446,276 @@ ostream& operator<<(ostream & o, const diff_extent& e) { int vector_iterate_cb(uint64_t off, size_t len, int exists, void *arg) { - cout << "iterate_cb " << off << "~" << len << std::endl; + //cout << "iterate_cb " << off << "~" << len << std::endl; vector<diff_extent> *diff = static_cast<vector<diff_extent> *>(arg); diff->push_back(diff_extent(off, len, exists, 0)); return 0; } +TYPED_TEST(DiffIterateTest, DiffIterateDeterministic) +{ + REQUIRE(!is_feature_enabled(RBD_FEATURE_STRIPINGV2)); + + rados_ioctx_t ioctx; + ASSERT_EQ(0, rados_ioctx_create(this->_cluster, this->m_pool_name.c_str(), + &ioctx)); + + rbd_image_t image; + int order = 22; + std::string name = this->get_temp_image_name(); + uint64_t size = 20 << 20; + + ASSERT_EQ(0, create_image(ioctx, name.c_str(), size, &order)); + ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL)); + + uint64_t object_size = 0; + if (this->whole_object) { + object_size = 1 << order; + } + + std::vector<diff_extent> extents; + ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(0u, extents.size()); + + ASSERT_EQ(0, rbd_snap_create(image, "snap1")); + + std::string buf(256, '1'); + ASSERT_EQ(256, rbd_write(image, 0, 256, buf.data())); + ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + extents.clear(); + + ASSERT_EQ(0, rbd_snap_create(image, "snap2")); + + ASSERT_EQ(256, rbd_write(image, 1 << order, 256, buf.data())); + ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); + extents.clear(); + + ASSERT_EQ(0, rbd_snap_create(image, "snap3")); + + // 1. beginning of time -> HEAD + ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); + extents.clear(); + + // 2. snap1 -> HEAD + ASSERT_EQ(0, rbd_diff_iterate2(image, "snap1", 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); + extents.clear(); + + // 3. snap2 -> HEAD + ASSERT_EQ(0, rbd_diff_iterate2(image, "snap2", 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[0]); + extents.clear(); + + // 4. snap3 -> HEAD + ASSERT_EQ(0, rbd_diff_iterate2(image, "snap3", 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(0u, extents.size()); + + ASSERT_PASSED(this->validate_object_map, image); + ASSERT_EQ(0, rbd_snap_set(image, "snap3")); + + // 5. beginning of time -> snap3 + ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); + extents.clear(); + + // 6. snap1 -> snap3 + ASSERT_EQ(0, rbd_diff_iterate2(image, "snap1", 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); + extents.clear(); + + // 7. snap2 -> snap3 + ASSERT_EQ(0, rbd_diff_iterate2(image, "snap2", 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[0]); + extents.clear(); + + ASSERT_PASSED(this->validate_object_map, image); + ASSERT_EQ(0, rbd_snap_set(image, "snap2")); + + // 8. beginning of time -> snap2 + ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + extents.clear(); + + // 9. snap1 -> snap2 + ASSERT_EQ(0, rbd_diff_iterate2(image, "snap1", 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + extents.clear(); + + ASSERT_PASSED(this->validate_object_map, image); + ASSERT_EQ(0, rbd_snap_set(image, "snap1")); + + // 10. beginning of time -> snap1 + ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(0u, extents.size()); + + ASSERT_PASSED(this->validate_object_map, image); + + ASSERT_EQ(0, rbd_close(image)); + rados_ioctx_destroy(ioctx); +} + +TYPED_TEST(DiffIterateTest, DiffIterateDeterministicPP) +{ + REQUIRE(!is_feature_enabled(RBD_FEATURE_STRIPINGV2)); + + librados::IoCtx ioctx; + ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx)); + + librbd::RBD rbd; + librbd::Image image; + int order = 22; + std::string name = this->get_temp_image_name(); + uint64_t size = 20 << 20; + + ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order)); + ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL)); + + uint64_t object_size = 0; + if (this->whole_object) { + object_size = 1 << order; + } + + std::vector<diff_extent> extents; + ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(0u, extents.size()); + + ASSERT_EQ(0, image.snap_create("snap1")); + + ceph::bufferlist bl; + bl.append(std::string(256, '1')); + ASSERT_EQ(256, image.write(0, 256, bl)); + ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + extents.clear(); + + ASSERT_EQ(0, image.snap_create("snap2")); + + ASSERT_EQ(256, image.write(1 << order, 256, bl)); + ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); + extents.clear(); + + ASSERT_EQ(0, image.snap_create("snap3")); + + // 1. beginning of time -> HEAD + ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); + extents.clear(); + + // 2. snap1 -> HEAD + ASSERT_EQ(0, image.diff_iterate2("snap1", 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); + extents.clear(); + + // 3. snap2 -> HEAD + ASSERT_EQ(0, image.diff_iterate2("snap2", 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[0]); + extents.clear(); + + // 4. snap3 -> HEAD + ASSERT_EQ(0, image.diff_iterate2("snap3", 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(0u, extents.size()); + + ASSERT_PASSED(this->validate_object_map, image); + ASSERT_EQ(0, image.snap_set("snap3")); + + // 5. beginning of time -> snap3 + ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); + extents.clear(); + + // 6. snap1 -> snap3 + ASSERT_EQ(0, image.diff_iterate2("snap1", 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); + extents.clear(); + + // 7. snap2 -> snap3 + ASSERT_EQ(0, image.diff_iterate2("snap2", 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[0]); + extents.clear(); + + ASSERT_PASSED(this->validate_object_map, image); + ASSERT_EQ(0, image.snap_set("snap2")); + + // 8. beginning of time -> snap2 + ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + extents.clear(); + + // 9. snap1 -> snap2 + ASSERT_EQ(0, image.diff_iterate2("snap1", 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); + extents.clear(); + + ASSERT_PASSED(this->validate_object_map, image); + ASSERT_EQ(0, image.snap_set("snap1")); + + // 10. beginning of time -> snap1 + ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(0u, extents.size()); + + ASSERT_PASSED(this->validate_object_map, image); +} + TYPED_TEST(DiffIterateTest, DiffIterateDiscard) { librados::IoCtx ioctx; @@ -7655,50 +7864,6 @@ TYPED_TEST(DiffIterateTest, DiffIterateStress) ASSERT_PASSED(this->validate_object_map, image); } -TYPED_TEST(DiffIterateTest, DiffIterateRegression6926) -{ - librados::IoCtx ioctx; - ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx)); - - librbd::RBD rbd; - librbd::Image image; - int order = 0; - std::string name = this->get_temp_image_name(); - uint64_t size = 20 << 20; - - ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order)); - ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL)); - - uint64_t object_size = 0; - if (this->whole_object) { - object_size = 1 << order; - } - vector<diff_extent> extents; - ceph::bufferlist bl; - - ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object, - vector_iterate_cb, (void *) &extents)); - ASSERT_EQ(0u, extents.size()); - - ASSERT_EQ(0, image.snap_create("snap1")); - char data[256]; - memset(data, 1, sizeof(data)); - bl.append(data, 256); - ASSERT_EQ(256, image.write(0, 256, bl)); - - extents.clear(); - ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object, - vector_iterate_cb, (void *) &extents)); - ASSERT_EQ(1u, extents.size()); - ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); - - ASSERT_EQ(0, image.snap_set("snap1")); - extents.clear(); - ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object, - vector_iterate_cb, (void *) &extents)); - ASSERT_EQ(static_cast<size_t>(0), extents.size()); -} - TYPED_TEST(DiffIterateTest, DiffIterateParent) { REQUIRE_FEATURE(RBD_FEATURE_LAYERING); diff --git a/src/test/librbd/test_main.cc b/src/test/librbd/test_main.cc index 2ff9f69dea9..82b72b1ef7e 100644 --- a/src/test/librbd/test_main.cc +++ b/src/test/librbd/test_main.cc @@ -17,6 +17,7 @@ extern void register_test_image_watcher(); extern void register_test_internal(); extern void register_test_journal_entries(); extern void register_test_journal_replay(); +extern void register_test_journal_stress(); extern void register_test_migration(); extern void register_test_mirroring(); extern void register_test_mirroring_watcher(); @@ -37,6 +38,7 @@ int main(int argc, char **argv) register_test_internal(); register_test_journal_entries(); register_test_journal_replay(); + register_test_journal_stress(); register_test_migration(); register_test_mirroring(); register_test_mirroring_watcher(); diff --git a/src/test/librbd/test_mock_Journal.cc b/src/test/librbd/test_mock_Journal.cc index 2fe74d2fe46..589695c50b3 100644 --- a/src/test/librbd/test_mock_Journal.cc +++ b/src/test/librbd/test_mock_Journal.cc @@ -460,7 +460,7 @@ public: bl.append_zero(length); std::shared_lock owner_locker{mock_image_ctx.owner_lock}; - return mock_journal->append_write_event(0, length, bl, false); + return mock_journal->append_write_event({{0, length}}, bl, false); } uint64_t when_append_compare_and_write_event( diff --git a/src/test/objectstore/Allocator_test.cc b/src/test/objectstore/Allocator_test.cc index 8204179b531..0e76c479002 100644 --- a/src/test/objectstore/Allocator_test.cc +++ b/src/test/objectstore/Allocator_test.cc @@ -587,8 +587,7 @@ TEST_P(AllocTest, test_alloc_47883) PExtentVector extents; auto need = 0x3f980000; auto got = alloc->allocate(need, 0x10000, 0, (int64_t)0, &extents); - EXPECT_GT(got, 0); - EXPECT_EQ(got, 0x630000); + EXPECT_GE(got, 0x630000); } TEST_P(AllocTest, test_alloc_50656_best_fit) diff --git a/src/test/objectstore/fastbmap_allocator_test.cc b/src/test/objectstore/fastbmap_allocator_test.cc index c5953198505..710b3798f7a 100644 --- a/src/test/objectstore/fastbmap_allocator_test.cc +++ b/src/test/objectstore/fastbmap_allocator_test.cc @@ -625,6 +625,8 @@ TEST(TestAllocatorLevel01, test_l2_contiguous_alignment) ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u); { + // Original free space disposition (start chunk, count): + // <NC/2, NC/2> size_t to_release = 2 * _1m + 0x1000; // release 2M + 4K at the beginning interval_vector_t r; @@ -637,6 +639,8 @@ TEST(TestAllocatorLevel01, test_l2_contiguous_alignment) ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u); } { + // Original free space disposition (start chunk, count): + // <0, 513>, <NC / 2, NC / 2> // allocate 4K within the deallocated range uint64_t allocated4 = 0; interval_vector_t a4; @@ -652,79 +656,91 @@ TEST(TestAllocatorLevel01, test_l2_contiguous_alignment) ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u); } { - // allocate 1M - should go to the second 1M chunk + // Original free space disposition (start chunk, count): + // <1, 512>, <NC / 2, NC / 2> + // allocate 1M - should go to offset 4096 uint64_t allocated4 = 0; interval_vector_t a4; al2.allocate_l2(_1m, _1m, &allocated4, &a4); ASSERT_EQ(a4.size(), 1u); ASSERT_EQ(allocated4, _1m); - ASSERT_EQ(a4[0].offset, _1m); + ASSERT_EQ(a4[0].offset, 4096); ASSERT_EQ(a4[0].length, _1m); bins_overall.clear(); al2.collect_stats(bins_overall); - ASSERT_EQ(bins_overall.size(), 3u); - ASSERT_EQ(bins_overall[0], 1u); - ASSERT_EQ(bins_overall[cbits((_1m - 0x1000) / 0x1000) - 1], 1u); + ASSERT_EQ(bins_overall.size(), 2u); + ASSERT_EQ(bins_overall[cbits(_1m / 0x1000) - 1], 1u); ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u); } { + // Original free space disposition (start chunk, count): + // <257, 256>, <NC / 2, NC / 2> // and allocate yet another 8K within the deallocated range uint64_t allocated4 = 0; interval_vector_t a4; al2.allocate_l2(0x2000, 0x1000, &allocated4, &a4); ASSERT_EQ(a4.size(), 1u); ASSERT_EQ(allocated4, 0x2000u); - ASSERT_EQ(a4[0].offset, 0x1000u); + ASSERT_EQ(a4[0].offset, _1m + 0x1000u); ASSERT_EQ(a4[0].length, 0x2000u); bins_overall.clear(); al2.collect_stats(bins_overall); - ASSERT_EQ(bins_overall[0], 1u); - ASSERT_EQ(bins_overall[cbits((_1m - 0x3000) / 0x1000) - 1], 1u); + ASSERT_EQ(bins_overall.size(), 2u); + ASSERT_EQ(bins_overall[cbits((_1m - 0x2000) / 0x1000) - 1], 1u); ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u); } { - // release just allocated 1M + // Original free space disposition (start chunk, count): + // <259, 254>, <NC / 2, NC / 2> + // release 4K~1M interval_vector_t r; - r.emplace_back(_1m, _1m); + r.emplace_back(0x1000, _1m); al2.free_l2(r); bins_overall.clear(); al2.collect_stats(bins_overall); - ASSERT_EQ(bins_overall.size(), 2u); - ASSERT_EQ(bins_overall[cbits((2 * _1m - 0x3000) / 0x1000) - 1], 1u); + ASSERT_EQ(bins_overall.size(), 3u); + //ASSERT_EQ(bins_overall[cbits((2 * _1m - 0x3000) / 0x1000) - 1], 1u); + ASSERT_EQ(bins_overall[cbits(_1m / 0x1000) - 1], 1u); + ASSERT_EQ(bins_overall[cbits((_1m - 0x2000) / 0x1000) - 1], 1u); ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u); } { - // allocate 3M - should go to the second 1M chunk and @capacity/2 + // Original free space disposition (start chunk, count): + // <1, 257>, <259, 254>, <NC / 2, NC / 2> + // allocate 3M - should go to the first 1M chunk and @capacity/2 uint64_t allocated4 = 0; interval_vector_t a4; al2.allocate_l2(3 * _1m, _1m, &allocated4, &a4); ASSERT_EQ(a4.size(), 2u); ASSERT_EQ(allocated4, 3 * _1m); - ASSERT_EQ(a4[0].offset, _1m); + ASSERT_EQ(a4[0].offset, 0x1000); ASSERT_EQ(a4[0].length, _1m); ASSERT_EQ(a4[1].offset, capacity / 2); ASSERT_EQ(a4[1].length, 2 * _1m); bins_overall.clear(); al2.collect_stats(bins_overall); - ASSERT_EQ(bins_overall.size(), 3u); - ASSERT_EQ(bins_overall[0], 1u); - ASSERT_EQ(bins_overall[cbits((_1m - 0x3000) / 0x1000) - 1], 1u); + ASSERT_EQ(bins_overall.size(), 2u); + ASSERT_EQ(bins_overall[cbits((_1m - 0x2000) / 0x1000) - 1], 1u); ASSERT_EQ(bins_overall[cbits((num_chunks - 512) / 2) - 1], 1u); } { - // release allocated 1M in the second meg chunk except + // Original free space disposition (start chunk, count): + // <259, 254>, <NC / 2 - 512, NC / 2 - 512> + // release allocated 1M in the first meg chunk except // the first 4K chunk interval_vector_t r; - r.emplace_back(_1m + 0x1000, _1m); + r.emplace_back(0x1000, _1m); al2.free_l2(r); bins_overall.clear(); al2.collect_stats(bins_overall); ASSERT_EQ(bins_overall.size(), 3u); ASSERT_EQ(bins_overall[cbits(_1m / 0x1000) - 1], 1u); - ASSERT_EQ(bins_overall[cbits((_1m - 0x3000) / 0x1000) - 1], 1u); + ASSERT_EQ(bins_overall[cbits((_1m - 0x2000) / 0x1000) - 1], 1u); ASSERT_EQ(bins_overall[cbits((num_chunks - 512) / 2) - 1], 1u); } { + // Original free space disposition (start chunk, count): + // <1, 256>, <259, 254>, <NC / 2 - 512, NC / 2 - 512> // release 2M @(capacity / 2) interval_vector_t r; r.emplace_back(capacity / 2, 2 * _1m); @@ -733,10 +749,12 @@ TEST(TestAllocatorLevel01, test_l2_contiguous_alignment) al2.collect_stats(bins_overall); ASSERT_EQ(bins_overall.size(), 3u); ASSERT_EQ(bins_overall[cbits(_1m / 0x1000) - 1], 1u); - ASSERT_EQ(bins_overall[cbits((_1m - 0x3000) / 0x1000) - 1], 1u); + ASSERT_EQ(bins_overall[cbits((_1m - 0x2000) / 0x1000) - 1], 1u); ASSERT_EQ(bins_overall[cbits((num_chunks) / 2) - 1], 1u); } { + // Original free space disposition (start chunk, count): + // <1, 256>, <259, 254>, <NC / 2, NC / 2> // allocate 4x512K - should go to the second halves of // the first and second 1M chunks and @(capacity / 2) uint64_t allocated4 = 0; @@ -744,51 +762,54 @@ TEST(TestAllocatorLevel01, test_l2_contiguous_alignment) al2.allocate_l2(2 * _1m, _1m / 2, &allocated4, &a4); ASSERT_EQ(a4.size(), 3u); ASSERT_EQ(allocated4, 2 * _1m); - ASSERT_EQ(a4[0].offset, _1m / 2); + ASSERT_EQ(a4[1].offset, 0x1000); + ASSERT_EQ(a4[1].length, _1m); + ASSERT_EQ(a4[0].offset, _1m + 0x3000); ASSERT_EQ(a4[0].length, _1m / 2); - ASSERT_EQ(a4[1].offset, _1m + _1m / 2); - ASSERT_EQ(a4[1].length, _1m / 2); ASSERT_EQ(a4[2].offset, capacity / 2); - ASSERT_EQ(a4[2].length, _1m); + ASSERT_EQ(a4[2].length, _1m / 2); bins_overall.clear(); al2.collect_stats(bins_overall); - ASSERT_EQ(bins_overall.size(), 3u); - ASSERT_EQ(bins_overall[0], 1u); - // below we have 512K - 4K & 512K - 12K chunks which both fit into - // the same bin = 6 - ASSERT_EQ(bins_overall[6], 2u); + ASSERT_EQ(bins_overall.size(), 2u); + ASSERT_EQ(bins_overall[cbits((_1m - 0x2000 - 0x80000) / 0x1000) - 1], 1u); ASSERT_EQ(bins_overall[cbits((num_chunks - 256) / 2) - 1], 1u); } { - // cleanup first 2M except except the last 4K chunk + // Original free space disposition (start chunk, count): + // <387, 126>, <NC / 2 + 128, NC / 2 - 128> + // cleanup first 1536K except the last 4K chunk interval_vector_t r; - r.emplace_back(0, 2 * _1m - 0x1000); + r.emplace_back(0, _1m + _1m / 2 - 0x1000); al2.free_l2(r); bins_overall.clear(); al2.collect_stats(bins_overall); ASSERT_EQ(bins_overall.size(), 3u); - ASSERT_EQ(bins_overall[0], 1u); - ASSERT_EQ(bins_overall[cbits((_2m - 0x1000) / 0x1000) - 1], 1u); + ASSERT_EQ(bins_overall[cbits((_1m + _1m / 2 - 0x1000) / 0x1000) - 1], 1u); + ASSERT_EQ(bins_overall[cbits((_1m - 0x2000 - 0x80000) / 0x1000) - 1], 1u); ASSERT_EQ(bins_overall[cbits((num_chunks - 256) / 2) - 1], 1u); } { - // release 2M @(capacity / 2) + // Original free space disposition (start chunk, count): + // <0, 383> <387, 126>, <NC / 2 + 128, NC / 2 - 128> + // release 512K @(capacity / 2) interval_vector_t r; - r.emplace_back(capacity / 2, 2 * _1m); + r.emplace_back(capacity / 2, _1m / 2); al2.free_l2(r); bins_overall.clear(); al2.collect_stats(bins_overall); ASSERT_EQ(bins_overall.size(), 3u); - ASSERT_EQ(bins_overall[0], 1u); - ASSERT_EQ(bins_overall[cbits((_2m - 0x1000) / 0x1000) - 1], 1u); + ASSERT_EQ(bins_overall[cbits((_1m + _1m / 2 - 0x1000) / 0x1000) - 1], 1u); + ASSERT_EQ(bins_overall[cbits((_1m - 0x2000 - 0x80000) / 0x1000) - 1], 1u); ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u); } { - // allocate 132M using 4M granularity should go to (capacity / 2) + // Original free space disposition (start chunk, count): + // <0, 383> <387, 126>, <NC / 2, NC / 2> + // allocate 132M (=33792*4096) = using 4M granularity should go to (capacity / 2) uint64_t allocated4 = 0; interval_vector_t a4; al2.allocate_l2(132 * _1m, 4 * _1m , &allocated4, &a4); @@ -799,24 +820,40 @@ TEST(TestAllocatorLevel01, test_l2_contiguous_alignment) bins_overall.clear(); al2.collect_stats(bins_overall); ASSERT_EQ(bins_overall.size(), 3u); + ASSERT_EQ(bins_overall[cbits((_1m + _1m / 2 - 0x1000) / 0x1000) - 1], 1u); + ASSERT_EQ(bins_overall[cbits((_1m - 0x2000 - 0x80000) / 0x1000) - 1], 1u); + ASSERT_EQ(bins_overall[cbits(num_chunks / 2 - 33792) - 1], 1u); } { - // cleanup left 4K chunk in the first 2M + // Original free space disposition (start chunk, count): + // <0, 383> <387, 126>, <NC / 2 + 33792, NC / 2 - 33792> + // cleanup remaining 4*4K chunks in the first 2M interval_vector_t r; - r.emplace_back(2 * _1m - 0x1000, 0x1000); + r.emplace_back(383 * 4096, 4 * 0x1000); al2.free_l2(r); bins_overall.clear(); al2.collect_stats(bins_overall); ASSERT_EQ(bins_overall.size(), 2u); + ASSERT_EQ(bins_overall[cbits((2 * _1m + 0x1000) / 0x1000) - 1], 1u); + ASSERT_EQ(bins_overall[cbits(num_chunks / 2 - 33792) - 1], 1u); } { + // Original free space disposition (start chunk, count): + // <0, 513>, <NC / 2 + 33792, NC / 2 - 33792> // release 132M @(capacity / 2) interval_vector_t r; r.emplace_back(capacity / 2, 132 * _1m); al2.free_l2(r); + bins_overall.clear(); + al2.collect_stats(bins_overall); + ASSERT_EQ(bins_overall.size(), 2u); + ASSERT_EQ(bins_overall[cbits((2 * _1m + 0x1000) / 0x1000) - 1], 1u); + ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u); } { + // Original free space disposition (start chunk, count): + // <0, 513>, <NC / 2, NC / 2> // allocate 132M using 2M granularity should go to the first chunk and to // (capacity / 2) uint64_t allocated4 = 0; @@ -827,14 +864,31 @@ TEST(TestAllocatorLevel01, test_l2_contiguous_alignment) ASSERT_EQ(a4[0].length, 2 * _1m); ASSERT_EQ(a4[1].offset, capacity / 2); ASSERT_EQ(a4[1].length, 130 * _1m); + + bins_overall.clear(); + al2.collect_stats(bins_overall); + + ASSERT_EQ(bins_overall.size(), 2u); + ASSERT_EQ(bins_overall[cbits(0)], 1u); + ASSERT_EQ(bins_overall[cbits(num_chunks / 2 - 33792) - 1], 1u); } { + // Original free space disposition (start chunk, count): + // <512, 1>, <NC / 2 + 33792, NC / 2 - 33792> // release 130M @(capacity / 2) interval_vector_t r; r.emplace_back(capacity / 2, 132 * _1m); al2.free_l2(r); + bins_overall.clear(); + al2.collect_stats(bins_overall); + + ASSERT_EQ(bins_overall.size(), 2u); + ASSERT_EQ(bins_overall[cbits(0)], 1u); + ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u); } { + // Original free space disposition (start chunk, count): + // <512,1>, <NC / 2, NC / 2> // release 4K~16K // release 28K~32K // release 68K~24K @@ -843,21 +897,46 @@ TEST(TestAllocatorLevel01, test_l2_contiguous_alignment) r.emplace_back(0x7000, 0x8000); r.emplace_back(0x11000, 0x6000); al2.free_l2(r); + + bins_overall.clear(); + al2.collect_stats(bins_overall); + + ASSERT_EQ(bins_overall.size(), 4u); + ASSERT_EQ(bins_overall[cbits(0)], 1u); + ASSERT_EQ(bins_overall[cbits(0x4000 / 0x1000) - 1], 2u); // accounts both 0x4000 & 0x6000 + ASSERT_EQ(bins_overall[cbits(0x8000 / 0x1000) - 1], 1u); + ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u); } { - // allocate 32K using 16K granularity - should bypass the first - // unaligned extent, use the second free extent partially given - // the 16K alignment and then fallback to capacity / 2 + // Original free space disposition (start chunk, count): + // <1, 4>, <7, 8>, <17, 6> <512,1>, <NC / 2, NC / 2> + // allocate 80K using 16K granularity uint64_t allocated4 = 0; interval_vector_t a4; - al2.allocate_l2(0x8000, 0x4000, &allocated4, &a4); - ASSERT_EQ(a4.size(), 2u); - ASSERT_EQ(a4[0].offset, 0x8000u); - ASSERT_EQ(a4[0].length, 0x4000u); - ASSERT_EQ(a4[1].offset, capacity / 2); + al2.allocate_l2(0x14000, 0x4000, &allocated4, &a4); + + ASSERT_EQ(a4.size(), 4); + ASSERT_EQ(a4[1].offset, 0x1000u); ASSERT_EQ(a4[1].length, 0x4000u); - } + ASSERT_EQ(a4[0].offset, 0x7000u); + ASSERT_EQ(a4[0].length, 0x8000u); + ASSERT_EQ(a4[2].offset, 0x11000u); + ASSERT_EQ(a4[2].length, 0x4000u); + ASSERT_EQ(a4[3].offset, capacity / 2); + ASSERT_EQ(a4[3].length, 0x4000u); + + bins_overall.clear(); + al2.collect_stats(bins_overall); + ASSERT_EQ(bins_overall.size(), 3u); + ASSERT_EQ(bins_overall[cbits(0)], 1u); + ASSERT_EQ(bins_overall[cbits(0x2000 / 0x1000) - 1], 1u); + ASSERT_EQ(bins_overall[cbits(num_chunks / 2 - 1) - 1], 1u); + } + { + // Original free space disposition (start chunk, count): + // <21, 2> <512,1>, <NC / 2 + 1, NC / 2 - 1> + } } std::cout << "Done L2 cont aligned" << std::endl; } @@ -913,7 +992,7 @@ TEST(TestAllocatorLevel01, test_4G_alloc_bug2) al2.allocate_l2(0x3e000000, _1m, &allocated4, &a4); ASSERT_EQ(a4.size(), 2u); ASSERT_EQ(allocated4, 0x3e000000u); - ASSERT_EQ(a4[0].offset, 0x5fed00000u); + ASSERT_EQ(a4[0].offset, 0x5fec30000u); ASSERT_EQ(a4[0].length, 0x1300000u); ASSERT_EQ(a4[1].offset, 0x628000000u); ASSERT_EQ(a4[1].length, 0x3cd00000u); diff --git a/src/test/objectstore/store_test.cc b/src/test/objectstore/store_test.cc index 03dc1a87e1f..25e1721c403 100644 --- a/src/test/objectstore/store_test.cc +++ b/src/test/objectstore/store_test.cc @@ -92,7 +92,23 @@ static bool bl_eq(bufferlist& expected, bufferlist& actual) return false; } +void dump_bluefs_stats() +{ + AdminSocket* admin_socket = g_ceph_context->get_admin_socket(); + ceph_assert(admin_socket); + + ceph::bufferlist in, out; + ostringstream err; + auto r = admin_socket->execute_command( + { "{\"prefix\": \"bluefs stats\"}" }, + in, err, &out); + if (r != 0) { + cerr << "failure querying: " << cpp_strerror(r) << std::endl; + } else { + std::cout << std::string(out.c_str(), out.length()) << std::endl; + } +} template <typename T> int queue_transaction( @@ -9607,9 +9623,9 @@ TEST_P(StoreTestSpecificAUSize, BluestoreRepairSharedBlobTest) { string key; _key_encode_u64(1, &key); bluestore_shared_blob_t sb(1); - sb.ref_map.get(0x2000, block_size); - sb.ref_map.get(0x4000, block_size); - sb.ref_map.get(0x4000, block_size); + sb.ref_map.get(0x822000, block_size); + sb.ref_map.get(0x824000, block_size); + sb.ref_map.get(0x824000, block_size); bufferlist bl; encode(sb, bl); bstore->inject_broken_shared_blob_key(key, bl); @@ -10462,24 +10478,11 @@ void doManySetAttr(ObjectStore* store, std::cout << "done" << std::endl; do_check_fn(store); - AdminSocket* admin_socket = g_ceph_context->get_admin_socket(); - ceph_assert(admin_socket); - - ceph::bufferlist in, out; - ostringstream err; - - auto r = admin_socket->execute_command( - { "{\"prefix\": \"bluefs stats\"}" }, - in, err, &out); - if (r != 0) { - cerr << "failure querying: " << cpp_strerror(r) << std::endl; - } else { - std::cout << std::string(out.c_str(), out.length()) << std::endl; - } + dump_bluefs_stats(); test_obj.shutdown(); } -TEST_P(StoreTestSpecificAUSize, SpilloverTest) { +TEST_P(StoreTestSpecificAUSize, SpilloverLegacyTest) { if (string(GetParam()) != "bluestore") return; if (smr) { @@ -10509,24 +10512,12 @@ TEST_P(StoreTestSpecificAUSize, SpilloverTest) { ceph_assert(bstore); bstore->compact(); const PerfCounters* logger = bstore->get_bluefs_perf_counters(); - //experimentally it was discovered that this case results in 400+MB spillover - //using lower 300MB threshold just to be safe enough - std::cout << "DB used:" << logger->get(l_bluefs_db_used_bytes) << std::endl; - std::cout << "SLOW used:" << logger->get(l_bluefs_slow_used_bytes) << std::endl; - ASSERT_GE(logger->get(l_bluefs_slow_used_bytes), 16 * 1024 * 1024); - - struct store_statfs_t statfs; - osd_alert_list_t alerts; - int r = store->statfs(&statfs, &alerts); - ASSERT_EQ(r, 0); - ASSERT_EQ(alerts.count("BLUEFS_SPILLOVER"), 1); - std::cout << "spillover_alert:" << alerts.find("BLUEFS_SPILLOVER")->second - << std::endl; + ASSERT_GT(logger->get(l_bluefs_slow_used_bytes), 0); } ); } -TEST_P(StoreTestSpecificAUSize, SpilloverFixedTest) { +TEST_P(StoreTestSpecificAUSize, SpilloverLegacyFixedByFitToFastTest) { if (string(GetParam()) != "bluestore") return; if (smr) { @@ -10536,8 +10527,15 @@ TEST_P(StoreTestSpecificAUSize, SpilloverFixedTest) { SetVal(g_conf(), "bluestore_block_db_create", "true"); SetVal(g_conf(), "bluestore_block_db_size", "3221225472"); - SetVal(g_conf(), "bluestore_volume_selection_policy", "use_some_extra"); - SetVal(g_conf(), "bluestore_volume_selection_reserved", "1"); // just use non-zero to enable + SetVal(g_conf(), "bluestore_volume_selection_policy", "fit_to_fast"); + // original RocksDB settings used before https://github.com/ceph/ceph/pull/47221/ + // which enable BlueFS spillover. + SetVal(g_conf(), "bluestore_rocksdb_options", + "compression=kNoCompression,max_write_buffer_number=4," + "min_write_buffer_number_to_merge=1,recycle_log_file_num=4," + "write_buffer_size=268435456,writable_file_max_buffer_size=0," + "compaction_readahead_size=2097152,max_background_compactions=2," + "max_total_wal_size=1073741824"); g_conf().apply_changes(nullptr); @@ -10549,12 +10547,28 @@ TEST_P(StoreTestSpecificAUSize, SpilloverFixedTest) { ceph_assert(bstore); bstore->compact(); const PerfCounters* logger = bstore->get_bluefs_perf_counters(); - ASSERT_EQ(0, logger->get(l_bluefs_slow_used_bytes)); + ASSERT_EQ(logger->get(l_bluefs_slow_used_bytes), 0); } ); } -TEST_P(StoreTestSpecificAUSize, SpilloverFixed2Test) { +void do_bluefs_write(BlueFS* _fs, + const char* dirname, + const char* filename, + uint64_t to_write) +{ + BlueFS::FileWriter* h; + ASSERT_EQ(0, _fs->open_for_write(dirname, filename, &h, false)); + uint64_t buf_size = 1ull << 20; + string buf(buf_size, 'a'); + for (uint64_t w = 0; w < to_write; w += buf_size) { + h->append(buf.c_str(), buf_size); + _fs->fsync(h); + } + _fs->close_writer(h); +} + +TEST_P(StoreTestSpecificAUSize, SpilloverTest) { if (string(GetParam()) != "bluestore") return; if (smr) { @@ -10564,27 +10578,31 @@ TEST_P(StoreTestSpecificAUSize, SpilloverFixed2Test) { SetVal(g_conf(), "bluestore_block_db_create", "true"); SetVal(g_conf(), "bluestore_block_db_size", "3221225472"); - SetVal(g_conf(), "bluestore_volume_selection_policy", "use_some_extra"); - //default 2.0 factor results in too high threshold, using less value - // that results in less but still present spillover. - SetVal(g_conf(), "bluestore_volume_selection_reserved_factor", "0.5"); + SetVal(g_conf(), "bluestore_volume_selection_policy", "rocksdb_original"); g_conf().apply_changes(nullptr); StartDeferred(65536); - doManySetAttr(store.get(), - [&](ObjectStore* _store) { + BlueStore* bstore = dynamic_cast<BlueStore*> (store.get()); + ceph_assert(bstore); + BlueFS* fs = bstore->get_bluefs(); + do_bluefs_write(fs, "db", "file1", 1ull << 30); // 1GB + do_bluefs_write(fs, "db.slow", "file2", 1ull << 30); // 1 GB - BlueStore* bstore = dynamic_cast<BlueStore*> (_store); - ceph_assert(bstore); - bstore->compact(); - const PerfCounters* logger = bstore->get_bluefs_perf_counters(); - ASSERT_LE(logger->get(l_bluefs_slow_used_bytes), 300 * 1024 * 1024); // see SpilloverTest for 300MB choice rationale - } - ); + dump_bluefs_stats(); + const PerfCounters* logger = bstore->get_bluefs_perf_counters(); + ASSERT_EQ(1ull << 30, logger->get(l_bluefs_slow_used_bytes)); + + struct store_statfs_t statfs; + osd_alert_list_t alerts; + int r = store->statfs(&statfs, &alerts); + ASSERT_EQ(r, 0); + ASSERT_EQ(alerts.count("BLUEFS_SPILLOVER"), 1); + std::cout << "spillover_alert:" << alerts.find("BLUEFS_SPILLOVER")->second + << std::endl; } -TEST_P(StoreTestSpecificAUSize, SpilloverFixed3Test) { +TEST_P(StoreTestSpecificAUSize, SpilloverFixedCompletelyTest) { if (string(GetParam()) != "bluestore") return; if (smr) { @@ -10594,21 +10612,60 @@ TEST_P(StoreTestSpecificAUSize, SpilloverFixed3Test) { SetVal(g_conf(), "bluestore_block_db_create", "true"); SetVal(g_conf(), "bluestore_block_db_size", "3221225472"); - SetVal(g_conf(), "bluestore_volume_selection_policy", "fit_to_fast"); + SetVal(g_conf(), "bluestore_volume_selection_policy", "use_some_extra"); + SetVal(g_conf(), "bluestore_volume_selection_reserved", "1"); // just use non-zero to enable g_conf().apply_changes(nullptr); StartDeferred(65536); - doManySetAttr(store.get(), - [&](ObjectStore* _store) { + BlueStore* bstore = dynamic_cast<BlueStore*> (store.get()); + ceph_assert(bstore); + BlueFS* fs = bstore->get_bluefs(); + do_bluefs_write(fs, "db", "file1", 1ull << 30); // 1GB + do_bluefs_write(fs, "db.slow", "file2", 1ull << 30); // 1 GB - BlueStore* bstore = dynamic_cast<BlueStore*> (_store); - ceph_assert(bstore); - bstore->compact(); - const PerfCounters* logger = bstore->get_bluefs_perf_counters(); - ASSERT_EQ(logger->get(l_bluefs_slow_used_bytes), 0); // reffering to SpilloverFixedTest - } - ); + dump_bluefs_stats(); + const PerfCounters* logger = bstore->get_bluefs_perf_counters(); + ASSERT_EQ(0, logger->get(l_bluefs_slow_used_bytes)); +} + +TEST_P(StoreTestSpecificAUSize, SpilloverFixedPartialTest) { + if (string(GetParam()) != "bluestore") + return; + if (smr) { + cout << "SKIP: (FIXME?) adjust me for smr at some point?" << std::endl; + return; + } + + SetVal(g_conf(), "bluestore_block_db_create", "true"); + SetVal(g_conf(), "bluestore_block_db_size", stringify(3ull << 30).c_str()); + SetVal(g_conf(), "bluestore_volume_selection_policy", "use_some_extra"); + //default 2.0 factor results in too high threshold, using less value + // that results in a reduced but existing spillover. + // + SetVal(g_conf(), "bluestore_volume_selection_reserved_factor", "1"); + + g_conf().apply_changes(nullptr); + + StartDeferred(65536); + BlueStore* bstore = dynamic_cast<BlueStore*> (store.get()); + ceph_assert(bstore); + BlueFS* fs = bstore->get_bluefs(); + do_bluefs_write(fs, "db", "file1", 1ull << 30); // 1 GB + do_bluefs_write(fs, "db.slow", "file2", 1ull << 30); // 1 GB + + dump_bluefs_stats(); + const PerfCounters* logger = bstore->get_bluefs_perf_counters(); + ASSERT_LT(100ull << 20, logger->get(l_bluefs_slow_used_bytes)); + ASSERT_GT(1ull << 30, logger->get(l_bluefs_slow_used_bytes)); + + struct store_statfs_t statfs; + osd_alert_list_t alerts; + int r = store->statfs(&statfs, &alerts); + ASSERT_EQ(r, 0); + ASSERT_EQ(alerts.count("BLUEFS_SPILLOVER"), 1); + std::cout << "spillover_alert:" << alerts.find("BLUEFS_SPILLOVER")->second + << std::endl; } TEST_P(StoreTestSpecificAUSize, Ticket45195Repro) { @@ -10783,19 +10840,7 @@ TEST_P(StoreTestSpecificAUSize, BluefsWriteInSingleDiskEnvTest) { bstore->inject_bluefs_file("db.wal", "store_test_injection_wal", 1 << 20ul); bstore->inject_bluefs_file("db", "store_test_injection_wal", 1 << 20ul); - AdminSocket* admin_socket = g_ceph_context->get_admin_socket(); - ceph_assert(admin_socket); - - ceph::bufferlist in, out; - ostringstream err; - auto r = admin_socket->execute_command( - { "{\"prefix\": \"bluefs stats\"}" }, - in, err, &out); - if (r != 0) { - cerr << "failure querying: " << cpp_strerror(r) << std::endl; - } else { - std::cout << std::string(out.c_str(), out.length()) << std::endl; - } + dump_bluefs_stats(); } TEST_P(StoreTestSpecificAUSize, BluefsWriteInNoWalDiskEnvTest) { @@ -10816,20 +10861,7 @@ TEST_P(StoreTestSpecificAUSize, BluefsWriteInNoWalDiskEnvTest) { bstore->inject_bluefs_file("db.wal", "store_test_injection_wal", 1 << 20ul); bstore->inject_bluefs_file("db", "store_test_injection_wal", 1 << 20ul); - AdminSocket* admin_socket = g_ceph_context->get_admin_socket(); - ceph_assert(admin_socket); - - ceph::bufferlist in, out; - ostringstream err; - auto r = admin_socket->execute_command( - { "{\"prefix\": \"bluefs stats\"}" }, - in, err, &out); - if (r != 0) { - cerr << "failure querying: " << cpp_strerror(r) << std::endl; - } - else { - std::cout << std::string(out.c_str(), out.length()) << std::endl; - } + dump_bluefs_stats(); } TEST_P(StoreTestOmapUpgrade, NoOmapHeader) { @@ -11005,6 +11037,8 @@ int main(int argc, char **argv) { g_ceph_context->_conf.set_val_or_die("bluestore_debug_randomize_serial_transaction", "10"); + g_ceph_context->_conf.set_val_or_die("bluefs_check_volume_selector_on_umount", "true"); + g_ceph_context->_conf.set_val_or_die("bdev_debug_aio", "true"); // specify device size diff --git a/src/test/pybind/test_rbd.py b/src/test/pybind/test_rbd.py index 7b5f31b577a..0ce3c0dd90c 100644 --- a/src/test/pybind/test_rbd.py +++ b/src/test/pybind/test_rbd.py @@ -415,6 +415,18 @@ def test_remove_canceled(tmp_image): assert_raises(OperationCanceled, RBD().remove, ioctx, image_name, on_progress=progress_cb) +def test_remove_with_progress_except(): + create_image() + d = {'received_callback': False} + def progress_cb(current, total): + d['received_callback'] = True + raise Exception() + + # exception is logged and ignored with a Cython warning: + # Exception ignored in: 'rbd.progress_callback' + RBD().remove(ioctx, image_name, on_progress=progress_cb) + eq(True, d['received_callback']) + def test_rename(tmp_image): rbd = RBD() image_name2 = get_temp_image_name() @@ -1251,6 +1263,16 @@ class TestImage(object): assert(comp.get_return_value() < 0) eq(sys.getrefcount(comp), 2) + # test3: except case + def cbex(_, buf): + raise KeyError() + + def test3(): + comp = self.image.aio_read(IMG_SIZE, 20, cbex) + comp.wait_for_complete_and_cb() + + assert_raises(KeyError, test3) + def test_aio_write(self): retval = [None] def cb(comp): diff --git a/src/test/rgw/test_rgw_lc.cc b/src/test/rgw/test_rgw_lc.cc index 83a4cac676d..d10b482cbfc 100644 --- a/src/test/rgw/test_rgw_lc.cc +++ b/src/test/rgw/test_rgw_lc.cc @@ -5,7 +5,6 @@ #include "rgw_lc.h" #include "rgw_lc_s3.h" #include <gtest/gtest.h> -//#include <spawn/spawn.hpp> #include <string> #include <vector> #include <stdexcept> @@ -107,3 +106,239 @@ TEST(TestLCFilterInvalidAnd, XMLDoc3) /* check our flags */ ASSERT_EQ(filter.get_flags(), uint32_t(LCFlagType::none)); } + +struct LCWorkTimeTests : ::testing::Test +{ + CephContext* cct; + std::unique_ptr<RGWLC::LCWorker> worker; + + // expects input in the form of "%m/%d/%y %H:%M:%S"; e.g., "01/15/23 23:59:01" + utime_t get_utime_by_date_time_string(const std::string& date_time_str) + { + struct tm tm{}; + struct timespec ts = {0}; + + strptime(date_time_str.c_str(), "%m/%d/%y %H:%M:%S", &tm); + ts.tv_sec = mktime(&tm); + + return utime_t(ts); + } + + // expects a map from input value (date & time string) to expected result (boolean) + void run_should_work_test(const auto& test_values_to_expectations_map) { + for (const auto& [date_time_str, expected_value] : test_values_to_expectations_map) { + auto ut = get_utime_by_date_time_string(date_time_str); + auto should_work = worker->should_work(ut); + + ASSERT_EQ(should_work, expected_value) + << "input time: " << ut + << " expected: " << expected_value + << " should_work: " << should_work + << " work-time-window: " << cct->_conf->rgw_lifecycle_work_time << std::endl; + } + } + + // expects a map from input value (a tuple of date & time strings) to expected result (seconds) + void run_schedule_next_start_time_test(const auto& test_values_to_expectations_map) { + for (const auto& [date_time_str_tuple, expected_value] : test_values_to_expectations_map) { + auto work_started_at = get_utime_by_date_time_string(std::get<0>(date_time_str_tuple)); + auto work_completed_at = get_utime_by_date_time_string(std::get<1>(date_time_str_tuple)); + auto wait_secs_till_next_start = worker->schedule_next_start_time(work_started_at, work_completed_at); + + ASSERT_EQ(wait_secs_till_next_start, expected_value) + << "work_started_at: " << work_started_at + << " work_completed_at: " << work_completed_at + << " expected: " << expected_value + << " wait_secs_till_next_start: " << wait_secs_till_next_start + << " work-time-window: " << cct->_conf->rgw_lifecycle_work_time << std::endl; + } + } + +protected: + + void SetUp() override { + cct = (new CephContext(CEPH_ENTITY_TYPE_ANY))->get(); + + cct->_conf->set_value("rgw_lc_max_wp_worker", 0, 0); // no need to create a real workpool + worker = std::make_unique<RGWLC::LCWorker>(nullptr, cct, nullptr, 0); + } + + void TearDown() override { + worker.reset(); + cct->put(); + } +}; + +TEST_F(LCWorkTimeTests, ShouldWorkDefaultWorkTime) +{ + std::unordered_map<std::string, bool> test_values_to_expectations = { + {"01/01/23 00:00:00", true}, + {"01/01/24 00:00:00", true}, // date is not relevant, but only the time-window + {"01/01/23 00:00:01", true}, + {"01/01/23 03:00:00", true}, + {"01/01/23 05:59:59", true}, + {"01/01/23 06:00:00", true}, + {"01/01/23 06:00:59", true}, // seconds don't matter, but only hours and minutes + {"01/01/23 06:01:00", false}, + {"01/01/23 23:59:59", false}, + {"01/02/23 23:59:59", false}, + {"01/01/23 12:00:00", false}, + {"01/01/23 14:00:00", false} + }; + + run_should_work_test(test_values_to_expectations); +} + +TEST_F(LCWorkTimeTests, ShouldWorkCustomWorkTimeEndTimeInTheSameDay) +{ + cct->_conf->rgw_lifecycle_work_time = "14:00-16:00"; + + std::unordered_map<std::string, bool> test_values_to_expectations = { + {"01/01/23 00:00:00", false}, + {"01/01/23 12:00:00", false}, + {"01/01/24 13:59:59", false}, + {"01/01/23 14:00:00", true}, + {"01/01/23 16:00:00", true}, + {"01/01/23 16:00:59", true}, + {"01/01/23 16:01:00", false}, + {"01/01/23 17:00:00", false}, + {"01/01/23 23:59:59", false}, + }; + + run_should_work_test(test_values_to_expectations); +} + +TEST_F(LCWorkTimeTests, ShouldWorkCustomWorkTimeEndTimeInTheSameDay24Hours) +{ + cct->_conf->rgw_lifecycle_work_time = "00:00-23:59"; + + std::unordered_map<std::string, bool> test_values_to_expectations = { + {"01/01/23 23:59:00", true}, + {"01/01/23 23:59:59", true}, + {"01/01/23 00:00:00", true}, + {"01/01/23 00:00:01", true}, + {"01/01/23 00:01:00", true}, + {"01/01/23 01:00:00", true}, + {"01/01/23 12:00:00", true}, + {"01/01/23 17:00:00", true}, + {"01/01/23 23:00:00", true} + }; + + run_should_work_test(test_values_to_expectations); +} + + +TEST_F(LCWorkTimeTests, ShouldWorkCustomWorkTimeEndTimeInTheNextDay) +{ + cct->_conf->rgw_lifecycle_work_time = "14:00-01:00"; + + std::unordered_map<std::string, bool> test_values_to_expectations = { + {"01/01/23 13:59:00", false}, + {"01/01/23 13:59:59", false}, + {"01/01/24 14:00:00", true}, // used-to-fail + {"01/01/24 17:00:00", true}, // used-to-fail + {"01/01/24 23:59:59", true}, // used-to-fail + {"01/01/23 00:00:00", true}, // used-to-fail + {"01/01/23 00:59:59", true}, // used-to-fail + {"01/01/23 01:00:00", true}, // used-to-fail + {"01/01/23 01:00:59", true}, // used-to-fail + {"01/01/23 01:01:00", false}, + {"01/01/23 05:00:00", false}, + {"01/01/23 12:00:00", false}, + {"01/01/23 13:00:00", false} + }; + + run_should_work_test(test_values_to_expectations); +} + +TEST_F(LCWorkTimeTests, ShouldWorkCustomWorkTimeEndTimeInTheNextDay24Hours) +{ + cct->_conf->rgw_lifecycle_work_time = "14:00-13:59"; + + // all of the below cases used-to-fail + std::unordered_map<std::string, bool> test_values_to_expectations = { + {"01/01/23 00:00:00", true}, + {"01/01/23 00:00:01", true}, + {"01/01/23 00:01:00", true}, + {"01/01/24 01:00:00", true}, + {"01/01/24 12:00:00", true}, + {"01/01/24 13:00:00", true}, + {"01/01/24 13:59:00", true}, + {"01/01/24 13:59:59", true}, + {"01/01/23 14:00:00", true}, + {"01/01/23 14:00:01", true}, + {"01/01/23 14:01:00", true}, + {"01/01/23 16:00:00", true}, + {"01/01/23 23:59:00", true}, + {"01/01/23 23:59:59", true}, + }; + + run_should_work_test(test_values_to_expectations); +} + +TEST_F(LCWorkTimeTests, ShouldWorkCustomWorkTimeEndTimeInTheNextDayIrregularMins) +{ + cct->_conf->rgw_lifecycle_work_time = "22:15-03:33"; + + std::unordered_map<std::string, bool> test_values_to_expectations = { + {"01/01/23 22:14:59", false}, + {"01/01/23 22:15:00", true}, // used-to-fail + {"01/01/24 00:00:00", true}, // used-to-fail + {"01/01/24 01:00:00", true}, // used-to-fail + {"01/01/24 02:00:00", true}, // used-to-fail + {"01/01/23 03:33:00", true}, // used-to-fail + {"01/01/23 03:33:59", true}, // used-to-fail + {"01/01/23 03:34:00", false}, + {"01/01/23 04:00:00", false}, + {"01/01/23 12:00:00", false}, + {"01/01/23 22:00:00", false}, + }; + + run_should_work_test(test_values_to_expectations); +} + +TEST_F(LCWorkTimeTests, ShouldWorkCustomWorkTimeStartEndSameHour) +{ + cct->_conf->rgw_lifecycle_work_time = "22:15-22:45"; + + std::unordered_map<std::string, bool> test_values_to_expectations = { + {"01/01/23 22:14:59", false}, + {"01/01/23 22:15:00", true}, + {"01/01/24 22:44:59", true}, + {"01/01/24 22:45:59", true}, + {"01/01/24 22:46:00", false}, + {"01/01/23 23:00:00", false}, + {"01/01/23 00:00:00", false}, + {"01/01/23 12:00:00", false}, + {"01/01/23 21:00:00", false}, + }; + + run_should_work_test(test_values_to_expectations); +} + +TEST_F(LCWorkTimeTests, ScheduleNextStartTime) +{ + cct->_conf->rgw_lifecycle_work_time = "22:15-03:33"; + + // items of the map: [ (work_started_time, work_completed_time), expected_value (seconds) ] + // + // expected_value is the difference between configured start time (i.e, 22:15:00) and + // the second item of the tuple (i.e., work_completed_time). + // + // Note that "seconds" of work completion time is taken into account but date is not relevant. + // e.g., the first testcase: 75713 == 01:13:07 - 22:15:00 (https://tinyurl.com/ydm86752) + std::map<std::tuple<std::string, std::string>, int> test_values_to_expectations = { + {{"01/01/23 22:15:05", "01/01/23 01:13:07"}, 75713}, + {{"01/01/23 22:15:05", "01/02/23 01:13:07"}, 75713}, + {{"01/01/23 22:15:05", "01/01/23 22:17:07"}, 86273}, + {{"01/01/23 22:15:05", "01/02/23 22:17:07"}, 86273}, + {{"01/01/23 22:15:05", "01/01/23 22:14:00"}, 60}, + {{"01/01/23 22:15:05", "01/02/23 22:14:00"}, 60}, + {{"01/01/23 22:15:05", "01/01/23 22:15:00"}, 24 * 60 * 60}, + {{"01/01/23 22:15:05", "01/02/23 22:15:00"}, 24 * 60 * 60}, + {{"01/01/23 22:15:05", "01/01/23 22:15:01"}, 24 * 60 * 60 - 1}, + {{"01/01/23 22:15:05", "01/02/23 22:15:01"}, 24 * 60 * 60 - 1}, + }; + + run_schedule_next_start_time_test(test_values_to_expectations); +} diff --git a/src/test/rgw/test_rgw_lua.cc b/src/test/rgw/test_rgw_lua.cc index 07f8521c231..0485e71ede3 100644 --- a/src/test/rgw/test_rgw_lua.cc +++ b/src/test/rgw/test_rgw_lua.cc @@ -86,7 +86,7 @@ public: return 0; } - virtual int read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB *cb) override { + virtual int read_stats_async(const DoutPrefixProvider *dpp, boost::intrusive_ptr<sal::ReadStatsCB> cb) override { return 0; } @@ -635,8 +635,12 @@ TEST(TestRGWLua, Acl) function print_grant(k, g) print("Grant Key: " .. tostring(k)) print("Grant Type: " .. g.Type) - print("Grant Group Type: " .. g.GroupType) - print("Grant Referer: " .. g.Referer) + if (g.GroupType) then + print("Grant Group Type: " .. g.GroupType) + end + if (g.Referer) then + print("Grant Referer: " .. g.Referer) + end if (g.User) then print("Grant User.Tenant: " .. g.User.Tenant) print("Grant User.Id: " .. g.User.Id) @@ -662,11 +666,11 @@ TEST(TestRGWLua, Acl) )"; DEFINE_REQ_STATE; - ACLOwner owner; - owner.set_id(rgw_user("jack", "black")); - owner.set_name("jack black"); - s.user_acl.reset(new RGWAccessControlPolicy(g_cct)); - s.user_acl->set_owner(owner); + const ACLOwner owner{ + .id = rgw_user("jack", "black"), + .display_name = "jack black" + }; + s.user_acl.set_owner(owner); ACLGrant grant1, grant2, grant3, grant4, grant5, grant6_1, grant6_2; grant1.set_canon(rgw_user("jane", "doe"), "her grant", 1); grant2.set_group(ACL_GROUP_ALL_USERS ,2); @@ -675,13 +679,13 @@ TEST(TestRGWLua, Acl) grant5.set_group(ACL_GROUP_AUTHENTICATED_USERS, 5); grant6_1.set_canon(rgw_user("kill", "bill"), "his grant", 6); grant6_2.set_canon(rgw_user("kill", "bill"), "her grant", 7); - s.user_acl->get_acl().add_grant(&grant1); - s.user_acl->get_acl().add_grant(&grant2); - s.user_acl->get_acl().add_grant(&grant3); - s.user_acl->get_acl().add_grant(&grant4); - s.user_acl->get_acl().add_grant(&grant5); - s.user_acl->get_acl().add_grant(&grant6_1); - s.user_acl->get_acl().add_grant(&grant6_2); + s.user_acl.get_acl().add_grant(grant1); + s.user_acl.get_acl().add_grant(grant2); + s.user_acl.get_acl().add_grant(grant3); + s.user_acl.get_acl().add_grant(grant4); + s.user_acl.get_acl().add_grant(grant5); + s.user_acl.get_acl().add_grant(grant6_1); + s.user_acl.get_acl().add_grant(grant6_2); const auto rc = lua::request::execute(nullptr, nullptr, nullptr, &s, nullptr, script); ASSERT_EQ(rc, 0); } @@ -730,17 +734,14 @@ TEST(TestRGWLua, UseFunction) )"; DEFINE_REQ_STATE; - s.owner.set_name("user two"); - s.owner.set_id(rgw_user("tenant2", "user2")); - s.user_acl.reset(new RGWAccessControlPolicy()); - s.user_acl->get_owner().set_name("user three"); - s.user_acl->get_owner().set_id(rgw_user("tenant3", "user3")); - s.bucket_acl.reset(new RGWAccessControlPolicy()); - s.bucket_acl->get_owner().set_name("user four"); - s.bucket_acl->get_owner().set_id(rgw_user("tenant4", "user4")); - s.object_acl.reset(new RGWAccessControlPolicy()); - s.object_acl->get_owner().set_name("user five"); - s.object_acl->get_owner().set_id(rgw_user("tenant5", "user5")); + s.owner.display_name = "user two"; + s.owner.id = rgw_user("tenant2", "user2"); + s.user_acl.get_owner().display_name = "user three"; + s.user_acl.get_owner().id = rgw_user("tenant3", "user3"); + s.bucket_acl.get_owner().display_name = "user four"; + s.bucket_acl.get_owner().id = rgw_user("tenant4", "user4"); + s.object_acl.get_owner().display_name = "user five"; + s.object_acl.get_owner().id = rgw_user("tenant5", "user5"); const auto rc = lua::request::execute(nullptr, nullptr, nullptr, &s, nullptr, script); ASSERT_EQ(rc, 0); diff --git a/src/test/test_c2c.cc b/src/test/test_c2c.cc index 5fe1ac2f1fc..1569be305e5 100644 --- a/src/test/test_c2c.cc +++ b/src/test/test_c2c.cc @@ -70,7 +70,7 @@ int main(int argc, const char **argv) while(1) { size_t i; if (sharding) { - i = mempool::pool_t::pick_a_shard_int(); + i = mempool::pick_a_shard_int(); } else { i = 0; } diff --git a/src/test/test_mempool.cc b/src/test/test_mempool.cc index 9dd96682f36..a8e20a41747 100644 --- a/src/test/test_mempool.cc +++ b/src/test/test_mempool.cc @@ -412,7 +412,7 @@ TEST(mempool, check_shard_select) for (size_t i = 0; i < samples; i++) { workers.push_back( std::thread([&](){ - size_t i = mempool::pool_t::pick_a_shard_int(); + size_t i = mempool::pick_a_shard_int(); shards[i]++; })); } diff --git a/src/test/test_rgw_admin_meta.cc b/src/test/test_rgw_admin_meta.cc index b1d5fad0600..00c43d10b54 100644 --- a/src/test/test_rgw_admin_meta.cc +++ b/src/test/test_rgw_admin_meta.cc @@ -460,7 +460,7 @@ int compare_access_keys(RGWAccessKey& k1, RGWAccessKey& k2) { int compare_user_info(RGWUserInfo& i1, RGWUserInfo& i2) { int rv; - if ((rv = i1.user_id.compare(i2.user_id)) != 0) + if ((rv = i1.user_id.id.compare(i2.user_id.id)) != 0) return rv; if ((rv = i1.display_name.compare(i2.display_name)) != 0) return rv; diff --git a/src/tools/ceph-dencoder/common_types.h b/src/tools/ceph-dencoder/common_types.h index 3180e3476be..e853321645b 100644 --- a/src/tools/ceph-dencoder/common_types.h +++ b/src/tools/ceph-dencoder/common_types.h @@ -23,12 +23,29 @@ TYPE(filepath) #include "include/fs_types.h" TYPE_FEATUREFUL(file_layout_t) +TYPE(inodeno_t) #include "include/util.h" TYPE(ceph_data_stats) +#include "include/object.h" +TYPE(object_t) +TYPE(sobject_t) + +#include "include/frag.h" +TYPE(fragtree_t) +TYPE(frag_t) + +#include "include/types.h" +TYPE(shard_id_t) +TYPE(inline_data_t) +TYPE(sha256_digest_t) +TYPE(errorcode32_t) +TYPE(client_t) + #include "common/bit_vector.hpp" TYPE(BitVector<2>) +TYPE(ceph::BitVector<2>) #include "common/bloom_filter.hpp" TYPE(bloom_filter) @@ -37,6 +54,9 @@ TYPE(compressible_bloom_filter) #include "common/DecayCounter.h" TYPE(DecayCounter) +#include "common/entity_name.h" +TYPE(EntityName) + #include "common/histogram.h" TYPE(pow2_hist_t) @@ -71,11 +91,17 @@ TYPE(cls_cas_chunk_put_ref_op) #include "cls/cas/cls_cas_internal.h" TYPE(chunk_refs_t) +TYPE(chunk_refs_count_t) +TYPE(chunk_refs_by_object_t) #include "cls/lock/cls_lock_types.h" TYPE(rados::cls::lock::locker_id_t) TYPE_FEATUREFUL(rados::cls::lock::locker_info_t) TYPE_FEATUREFUL(rados::cls::lock::lock_info_t) +using namespace rados::cls::lock; +TYPE(locker_id_t) +TYPE_FEATUREFUL(locker_info_t) +TYPE_FEATUREFUL(lock_info_t) #include "cls/lock/cls_lock_ops.h" TYPE(cls_lock_lock_op) @@ -98,6 +124,64 @@ TYPE(obj_refcount) #include "cls/timeindex/cls_timeindex_types.h" TYPE(cls_timeindex_entry) +#include "cls/timeindex/cls_timeindex_ops.h" +TYPE(cls_timeindex_list_op) +TYPE(cls_timeindex_list_ret) + +#include "cls/queue/cls_queue_types.h" +TYPE(cls_queue_entry) +TYPE(cls_queue_marker) +TYPE(cls_queue_head) + +#include "cls/queue/cls_queue_ops.h" +TYPE(cls_queue_get_capacity_ret) +TYPE(cls_queue_remove_op) +TYPE(cls_queue_enqueue_op) +TYPE(cls_queue_list_op) +TYPE(cls_queue_list_ret) +TYPE(cls_queue_init_op) + +#include "cls/2pc_queue/cls_2pc_queue_ops.h" +TYPE(cls_2pc_queue_abort_op) +TYPE(cls_2pc_queue_commit_op) +TYPE(cls_2pc_queue_expire_op) +TYPE_NONDETERMINISTIC(cls_2pc_queue_reservations_ret) +TYPE(cls_2pc_queue_reserve_op) +TYPE(cls_2pc_queue_reserve_ret) +TYPE(cls_queue_init_op) + +#include "cls/2pc_queue/cls_2pc_queue_types.h" +TYPE(cls_2pc_reservation) +TYPE_NONDETERMINISTIC(cls_2pc_urgent_data) + +#include "cls/log/cls_log_types.h" +TYPE(cls_log_header) + +#include "cls/log/cls_log_ops.h" +TYPE(cls_log_info_op) +TYPE(cls_log_list_op) +TYPE(cls_log_list_ret) +TYPE(cls_log_trim_op) + +#include "cls/version/cls_version_ops.h" +TYPE(cls_version_check_op) +TYPE(cls_version_read_ret) +TYPE(cls_version_inc_op) +TYPE(cls_version_set_op) + + +#include "cls/fifo/cls_fifo_ops.h" +using namespace rados::cls::fifo::op; +TYPE(create_meta) +TYPE(get_meta) +TYPE(get_meta_reply) + +#include "cls/fifo/cls_fifo_types.h" +using namespace rados::cls::fifo; +TYPE(data_params) +TYPE(objv) +TYPE(info) + #include "journal/Entry.h" TYPE(journal::Entry) @@ -125,9 +209,12 @@ MESSAGE(MClientReconnect) #include "messages/MClientReply.h" MESSAGE(MClientReply) +TYPE(openc_response_t) #include "messages/MClientRequest.h" MESSAGE(MClientRequest) +TYPE(SnapPayload) +TYPE(MClientRequest::Release) #include "messages/MClientRequestForward.h" MESSAGE(MClientRequestForward) @@ -251,6 +338,9 @@ MESSAGE(MMDSBeacon) #include "messages/MMDSCacheRejoin.h" MESSAGE(MMDSCacheRejoin) +TYPE(MMDSCacheRejoin::dirfrag_strong) +TYPE(MMDSCacheRejoin::dn_strong) +TYPE(MMDSCacheRejoin::inode_strong) #include "messages/MMDSFindIno.h" MESSAGE(MMDSFindIno) @@ -265,10 +355,14 @@ MESSAGE(MMDSFragmentNotify) MESSAGE(MMDSLoadTargets) #include "messages/MMDSMap.h" -MESSAGE(MMDSMap) +MESSAGE(MMDSMap) + +#include "messages/MMgrBeacon.h" +MESSAGE(MMgrBeacon) #include "messages/MMgrReport.h" MESSAGE(MMgrReport) +TYPE(PerfCounterType) #include "messages/MMDSResolve.h" MESSAGE(MMDSResolve) @@ -321,6 +415,9 @@ MESSAGE(MMonGetVersionReply) #include "messages/MMonGlobalID.h" MESSAGE(MMonGlobalID) +#include "messages/MMonHealth.h" +MESSAGE(MMonHealth) + #include "messages/MMonJoin.h" MESSAGE(MMonJoin) @@ -452,3 +549,74 @@ MESSAGE(MWatchNotify) #include "messages/MMgrUpdate.h" MESSAGE(MMgrUpdate) + +#include "messages/MOSDECSubOpRead.h" +MESSAGE(MOSDECSubOpRead) + +#include "messages/MOSDECSubOpReadReply.h" +MESSAGE(MOSDECSubOpReadReply) + +#include "messages/MOSDECSubOpWrite.h" +MESSAGE(MOSDECSubOpWrite) + +#include "messages/MOSDECSubOpWriteReply.h" +MESSAGE(MOSDECSubOpWriteReply) + +#include "messages/MOSDMarkMeDown.h" +MESSAGE(MOSDMarkMeDown) + +#include "messages/MOSDPGCreated.h" +MESSAGE(MOSDPGCreated) + +#include "messages/MOSDPGPush.h" +MESSAGE(MOSDPGPush) + +#include "messages/MOSDPGPushReply.h" +MESSAGE(MOSDPGPushReply) + +#include "messages/MOSDPGUpdateLogMissing.h" +MESSAGE(MOSDPGUpdateLogMissing) + +#include "messages/MOSDPGUpdateLogMissingReply.h" +MESSAGE(MOSDPGUpdateLogMissingReply) + +#include "messages/MOSDRepOp.h" +MESSAGE(MOSDRepOp) + +#include "messages/MOSDRepOpReply.h" +MESSAGE(MOSDRepOpReply) + +#include "messages/MRecoveryReserve.h" +MESSAGE(MRecoveryReserve) + + +#include "auth/cephx/CephxProtocol.h" +TYPE(CephXAuthenticate) +TYPE(CephXAuthorize) +TYPE(CephXAuthorizeChallenge) +TYPE(CephXAuthorizeReply) +TYPE(CephXChallengeBlob) +TYPE(CephXRequestHeader) +TYPE(CephXResponseHeader) +TYPE(CephXServerChallenge) +TYPE(CephXServiceTicket) +TYPE(CephXServiceTicketInfo) +TYPE(CephXServiceTicketRequest) +TYPE(CephXTicketBlob) + +#include "auth/cephx/CephxKeyServer.h" +TYPE(KeyServerData) +TYPE(KeyServerData::Incremental) + +#include "auth/Auth.h" +TYPE(RotatingSecrets) +TYPE(ExpiringCryptoKey) +TYPE(AuthCapsInfo) +TYPE(AuthTicket) +TYPE(EntityAuth) + +#include "auth/Crypto.h" +TYPE(CryptoKey) + +#include "common/ceph_json.h" +TYPE(JSONFormattable) diff --git a/src/vstart.sh b/src/vstart.sh index 634fb74ac87..1187e86b9ed 100755 --- a/src/vstart.sh +++ b/src/vstart.sh @@ -168,6 +168,7 @@ fi ceph_osd=ceph-osd rgw_frontend="beast" rgw_compression="" +rgw_store="rados" lockdep=${LOCKDEP:-1} spdk_enabled=0 # disable SPDK by default pmem_enabled=0 @@ -229,6 +230,7 @@ options: --rgw_frontend specify the rgw frontend configuration --rgw_arrow_flight start arrow flight frontend --rgw_compression specify the rgw compression plugin + --rgw_store storage backend: rados|dbstore|posix --seastore use seastore as crimson osd backend -b, --bluestore use bluestore as the osd objectstore backend (default) -K, --kstore use kstore as the osd objectstore backend @@ -461,6 +463,10 @@ case $1 in rgw_compression=$2 shift ;; + --rgw_store) + rgw_store=$2 + shift + ;; --kstore_path) kstore_path=$2 shift @@ -692,6 +698,22 @@ done } +do_rgw_dbstore_conf() { + if [ $CEPH_NUM_RGW -gt 1 ]; then + echo "dbstore is not distributed so only works with CEPH_NUM_RGW=1" + exit 1 + fi + + prun mkdir -p "$CEPH_DEV_DIR/rgw/dbstore" + wconf <<EOF + rgw backend store = dbstore + rgw config store = dbstore + dbstore db dir = $CEPH_DEV_DIR/rgw/dbstore + dbstore_config_uri = file://$CEPH_DEV_DIR/rgw/dbstore/config.db + +EOF +} + format_conf() { local opts=$1 local indent=" " @@ -861,6 +883,20 @@ $CCLIENTDEBUG ; rgw lc debug interval = 10 $(format_conf "${extra_conf}") EOF + if [ "$rgw_store" == "dbstore" ] ; then + do_rgw_dbstore_conf + elif [ "$rgw_store" == "posix" ] ; then + # use dbstore as the backend and posix as the filter + do_rgw_dbstore_conf + posix_dir="$CEPH_DEV_DIR/rgw/posix" + prun mkdir -p $posix_dir/root $posix_dir/lmdb + wconf <<EOF + rgw filter = posix + rgw posix base path = $posix_dir/root + rgw posix database root = $posix_dir/lmdb + +EOF + fi do_rgw_conf wconf << EOF [mds] @@ -1785,11 +1821,13 @@ do_rgw() for n in $(seq 1 $CEPH_NUM_RGW); do rgw_name="client.rgw.${current_port}" - ceph_adm auth get-or-create $rgw_name \ - mon 'allow rw' \ - osd 'allow rwx' \ - mgr 'allow rw' \ - >> "$keyring_fn" + if [ "$CEPH_NUM_MON" -gt 0 ]; then + ceph_adm auth get-or-create $rgw_name \ + mon 'allow rw' \ + osd 'allow rwx' \ + mgr 'allow rw' \ + >> "$keyring_fn" + fi debug echo start rgw on http${CEPH_RGW_HTTPS}://localhost:${current_port} run 'rgw' $current_port $RGWSUDO $CEPH_BIN/radosgw -c $conf_fn \ |