diff options
Diffstat (limited to 'src')
198 files changed, 1625 insertions, 665 deletions
diff --git a/src/ceph-volume/ceph_volume/objectstore/lvmbluestore.py b/src/ceph-volume/ceph_volume/objectstore/lvmbluestore.py index ba3719cd3f3..aa11d553723 100644 --- a/src/ceph-volume/ceph_volume/objectstore/lvmbluestore.py +++ b/src/ceph-volume/ceph_volume/objectstore/lvmbluestore.py @@ -367,7 +367,7 @@ class LvmBlueStore(BlueStore): if is_encrypted: osd_lv_path = '/dev/mapper/%s' % osd_block_lv.__dict__['lv_uuid'] lockbox_secret = osd_block_lv.tags['ceph.cephx_lockbox_secret'] - self.with_tpm = bool(osd_block_lv.tags.get('ceph.with_tpm', 0)) + self.with_tpm = osd_block_lv.tags.get('ceph.with_tpm') == '1' if not self.with_tpm: encryption_utils.write_lockbox_keyring(osd_id, osd_fsid, diff --git a/src/ceph_mds.cc b/src/ceph_mds.cc index 5a917fa807c..ba8726a2be3 100644 --- a/src/ceph_mds.cc +++ b/src/ceph_mds.cc @@ -81,7 +81,7 @@ static void handle_mds_signal(int signum) int main(int argc, const char **argv) { - ceph_pthread_setname(pthread_self(), "ceph-mds"); + ceph_pthread_setname("ceph-mds"); auto args = argv_to_vec(argc, argv); if (args.empty()) { diff --git a/src/ceph_mgr.cc b/src/ceph_mgr.cc index 67bda0c51be..bd2c643bc6b 100644 --- a/src/ceph_mgr.cc +++ b/src/ceph_mgr.cc @@ -41,7 +41,7 @@ static void usage() */ int main(int argc, const char **argv) { - ceph_pthread_setname(pthread_self(), "ceph-mgr"); + ceph_pthread_setname("ceph-mgr"); auto args = argv_to_vec(argc, argv); if (args.empty()) { diff --git a/src/ceph_mon.cc b/src/ceph_mon.cc index 279fdb20ccb..63eb252e38f 100644 --- a/src/ceph_mon.cc +++ b/src/ceph_mon.cc @@ -250,7 +250,7 @@ int main(int argc, const char **argv) { // reset our process name, in case we did a respawn, so that it's not // left as "exe". - ceph_pthread_setname(pthread_self(), "ceph-mon"); + ceph_pthread_setname("ceph-mon"); int err; diff --git a/src/ceph_nvmeof_monitor_client.cc b/src/ceph_nvmeof_monitor_client.cc index 05457998cb8..fa41bed08ad 100644 --- a/src/ceph_nvmeof_monitor_client.cc +++ b/src/ceph_nvmeof_monitor_client.cc @@ -45,7 +45,7 @@ static void usage() */ int main(int argc, const char **argv) { - ceph_pthread_setname(pthread_self(), "ceph-nvmeof-monitor-client"); + ceph_pthread_setname("ceph-nvmeof-monitor-client"); auto args = argv_to_vec(argc, argv); if (args.empty()) { diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py index f2172bf9083..d2ddf564116 100755 --- a/src/cephadm/cephadm.py +++ b/src/cephadm/cephadm.py @@ -57,6 +57,7 @@ from cephadmlib.constants import ( LOG_DIR_MODE, SYSCTL_DIR, UNIT_DIR, + DAEMON_FAILED_ERROR, ) from cephadmlib.context import CephadmContext from cephadmlib.context_getters import ( @@ -72,6 +73,7 @@ from cephadmlib.exceptions import ( ClusterAlreadyExists, Error, UnauthorizedRegistryError, + DaemonStartException, ) from cephadmlib.exe_utils import find_executable, find_program from cephadmlib.call_wrappers import ( @@ -1246,7 +1248,11 @@ def deploy_daemon_units( call_throws(ctx, ['systemctl', 'enable', unit_name]) if start: clean_cgroup(ctx, ident.fsid, unit_name) - call_throws(ctx, ['systemctl', 'start', unit_name]) + try: + call_throws(ctx, ['systemctl', 'start', unit_name]) + except Exception as e: + logger.error(f'systemctl start failed for {unit_name}: {str(e)}') + raise DaemonStartException() def _osd_unit_run_commands( @@ -3050,7 +3056,10 @@ def get_deployment_type( @deprecated_command def command_deploy(ctx): # type: (CephadmContext) -> None - _common_deploy(ctx) + try: + _common_deploy(ctx) + except DaemonStartException: + sys.exit(DAEMON_FAILED_ERROR) def apply_deploy_config_to_ctx( @@ -3093,7 +3102,10 @@ def command_deploy_from(ctx: CephadmContext) -> None: config_data = read_configuration_source(ctx) logger.debug('Loaded deploy configuration: %r', config_data) apply_deploy_config_to_ctx(config_data, ctx) - _common_deploy(ctx) + try: + _common_deploy(ctx) + except DaemonStartException: + sys.exit(DAEMON_FAILED_ERROR) def _common_deploy(ctx: CephadmContext) -> None: @@ -4489,8 +4501,9 @@ def _rm_cluster(ctx: CephadmContext, keep_logs: bool, zap_osds: bool) -> None: ################################## -def check_time_sync(ctx, enabler=None): - # type: (CephadmContext, Optional[Packager]) -> bool +def check_time_sync( + ctx: CephadmContext, enabler: Optional[Packager] = None +) -> bool: units = [ 'chrony.service', # 18.04 (at least) 'chronyd.service', # el / opensuse diff --git a/src/cephadm/cephadmlib/call_wrappers.py b/src/cephadm/cephadmlib/call_wrappers.py index 3fe2171e99d..d3d327c218c 100644 --- a/src/cephadm/cephadmlib/call_wrappers.py +++ b/src/cephadm/cephadmlib/call_wrappers.py @@ -311,14 +311,14 @@ def call_throws( return out, err, ret -def call_timeout(ctx, command, timeout): - # type: (CephadmContext, List[str], int) -> int +def call_timeout( + ctx: CephadmContext, command: List[str], timeout: int +) -> int: logger.debug( 'Running command (timeout=%s): %s' % (timeout, ' '.join(command)) ) - def raise_timeout(command, timeout): - # type: (List[str], int) -> NoReturn + def raise_timeout(command: List[str], timeout: int) -> NoReturn: msg = 'Command `%s` timed out after %s seconds' % (command, timeout) logger.debug(msg) raise TimeoutExpired(msg) diff --git a/src/cephadm/cephadmlib/constants.py b/src/cephadm/cephadmlib/constants.py index 215d207729d..1df46353fb3 100644 --- a/src/cephadm/cephadmlib/constants.py +++ b/src/cephadm/cephadmlib/constants.py @@ -36,3 +36,4 @@ QUIET_LOG_LEVEL = 9 # DEBUG is 10, so using 9 to be lower level than DEBUG NO_DEPRECATED = False UID_NOBODY = 65534 GID_NOGROUP = 65534 +DAEMON_FAILED_ERROR = 17 diff --git a/src/cephadm/cephadmlib/daemon_identity.py b/src/cephadm/cephadmlib/daemon_identity.py index 52a18092bf0..bfe1a855186 100644 --- a/src/cephadm/cephadmlib/daemon_identity.py +++ b/src/cephadm/cephadmlib/daemon_identity.py @@ -157,7 +157,7 @@ class DaemonSubIdentity(DaemonIdentity): ) def sidecar_script(self, base_data_dir: Union[str, os.PathLike]) -> str: - sname = f'sidecar-{ self.subcomponent }.run' + sname = f'sidecar-{self.subcomponent}.run' return str(pathlib.Path(self.data_dir(base_data_dir)) / sname) @property diff --git a/src/cephadm/cephadmlib/daemons/ingress.py b/src/cephadm/cephadmlib/daemons/ingress.py index 8f4f6b08991..c88e39ac025 100644 --- a/src/cephadm/cephadmlib/daemons/ingress.py +++ b/src/cephadm/cephadmlib/daemons/ingress.py @@ -82,8 +82,7 @@ class HAproxy(ContainerDaemonForm): def get_daemon_args(self) -> List[str]: return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg'] - def validate(self): - # type: () -> None + def validate(self) -> None: if not is_fsid(self.fsid): raise Error('not an fsid: %s' % self.fsid) if not self.daemon_id: @@ -99,12 +98,10 @@ class HAproxy(ContainerDaemonForm): 'required file missing from config-json: %s' % fname ) - def get_daemon_name(self): - # type: () -> str + def get_daemon_name(self) -> str: return '%s.%s' % (self.daemon_type, self.daemon_id) - def get_container_name(self, desc=None): - # type: (Optional[str]) -> str + def get_container_name(self, desc: Optional[str] = None) -> str: cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) if desc: cname = '%s-%s' % (cname, desc) @@ -212,8 +209,7 @@ class Keepalived(ContainerDaemonForm): # populate files from the config-json populate_files(data_dir, self.files, uid, gid) - def validate(self): - # type: () -> None + def validate(self) -> None: if not is_fsid(self.fsid): raise Error('not an fsid: %s' % self.fsid) if not self.daemon_id: @@ -229,20 +225,17 @@ class Keepalived(ContainerDaemonForm): 'required file missing from config-json: %s' % fname ) - def get_daemon_name(self): - # type: () -> str + def get_daemon_name(self) -> str: return '%s.%s' % (self.daemon_type, self.daemon_id) - def get_container_name(self, desc=None): - # type: (Optional[str]) -> str + def get_container_name(self, desc: Optional[str] = None) -> str: cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) if desc: cname = '%s-%s' % (cname, desc) return cname @staticmethod - def get_container_envs(): - # type: () -> List[str] + def get_container_envs() -> List[str]: envs = [ 'KEEPALIVED_AUTOCONF=false', 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf', diff --git a/src/cephadm/cephadmlib/daemons/nfs.py b/src/cephadm/cephadmlib/daemons/nfs.py index f09374d5f46..70ccea65b5b 100644 --- a/src/cephadm/cephadmlib/daemons/nfs.py +++ b/src/cephadm/cephadmlib/daemons/nfs.py @@ -42,9 +42,13 @@ class NFSGanesha(ContainerDaemonForm): return cls.daemon_type == daemon_type def __init__( - self, ctx, fsid, daemon_id, config_json, image=DEFAULT_IMAGE - ): - # type: (CephadmContext, str, Union[int, str], Dict, str) -> None + self, + ctx: CephadmContext, + fsid: str, + daemon_id: Union[int, str], + config_json: Dict, + image: str = DEFAULT_IMAGE, + ) -> None: self.ctx = ctx self.fsid = fsid self.daemon_id = daemon_id @@ -62,8 +66,9 @@ class NFSGanesha(ContainerDaemonForm): self.validate() @classmethod - def init(cls, ctx, fsid, daemon_id): - # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha + def init( + cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] + ) -> 'NFSGanesha': return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image) @classmethod diff --git a/src/cephadm/cephadmlib/daemons/nvmeof.py b/src/cephadm/cephadmlib/daemons/nvmeof.py index 2c20a900f45..d916c7e6391 100644 --- a/src/cephadm/cephadmlib/daemons/nvmeof.py +++ b/src/cephadm/cephadmlib/daemons/nvmeof.py @@ -33,9 +33,13 @@ class CephNvmeof(ContainerDaemonForm): return cls.daemon_type == daemon_type def __init__( - self, ctx, fsid, daemon_id, config_json, image=DEFAULT_NVMEOF_IMAGE - ): - # type: (CephadmContext, str, Union[int, str], Dict, str) -> None + self, + ctx: CephadmContext, + fsid: str, + daemon_id: Union[int, str], + config_json: Dict, + image: str = DEFAULT_NVMEOF_IMAGE, + ) -> None: self.ctx = ctx self.fsid = fsid self.daemon_id = daemon_id @@ -48,8 +52,9 @@ class CephNvmeof(ContainerDaemonForm): self.validate() @classmethod - def init(cls, ctx, fsid, daemon_id): - # type: (CephadmContext, str, Union[int, str]) -> CephNvmeof + def init( + cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] + ) -> 'CephNvmeof': return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image) @classmethod @@ -73,13 +78,18 @@ class CephNvmeof(ContainerDaemonForm): os.path.join(data_dir, 'ceph-nvmeof.conf') ] = '/src/ceph-nvmeof.conf:z' mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config' - mounts['/dev/hugepages'] = '/dev/hugepages' - mounts['/dev/vfio/vfio'] = '/dev/vfio/vfio' mounts[log_dir] = '/var/log/ceph:z' if mtls_dir: mounts[mtls_dir] = '/src/mtls:z' return mounts + def _get_huge_pages_mounts(self, files: Dict[str, str]) -> Dict[str, str]: + mounts = dict() + if 'spdk_mem_size' not in files: + mounts['/dev/hugepages'] = '/dev/hugepages' + mounts['/dev/vfio/vfio'] = '/dev/vfio/vfio' + return mounts + def _get_tls_cert_key_mounts( self, data_dir: str, files: Dict[str, str] ) -> Dict[str, str]: @@ -111,6 +121,7 @@ class CephNvmeof(ContainerDaemonForm): ) else: mounts.update(self._get_container_mounts(data_dir, log_dir)) + mounts.update(self._get_huge_pages_mounts(self.files)) mounts.update(self._get_tls_cert_key_mounts(data_dir, self.files)) def customize_container_binds( @@ -198,11 +209,13 @@ class CephNvmeof(ContainerDaemonForm): ) return cmd.split() - @staticmethod - def get_sysctl_settings() -> List[str]: - return [ - 'vm.nr_hugepages = 4096', - ] + def get_sysctl_settings(self) -> List[str]: + if 'spdk_mem_size' not in self.files: + return [ + 'vm.nr_hugepages = 4096', + ] + else: + return [] def container(self, ctx: CephadmContext) -> CephContainer: ctr = daemon_to_container(ctx, self) @@ -222,4 +235,6 @@ class CephNvmeof(ContainerDaemonForm): args.append(ctx.container_engine.unlimited_pids_option) args.extend(['--ulimit', 'memlock=-1:-1']) args.extend(['--ulimit', 'nofile=10240']) - args.extend(['--cap-add=SYS_ADMIN', '--cap-add=CAP_SYS_NICE']) + args.extend(['--cap-add=CAP_SYS_NICE']) + if 'spdk_mem_size' not in self.files: + args.extend(['--cap-add=SYS_ADMIN']) diff --git a/src/cephadm/cephadmlib/data_utils.py b/src/cephadm/cephadmlib/data_utils.py index 0ab8b38d2b5..9caef3f72e5 100644 --- a/src/cephadm/cephadmlib/data_utils.py +++ b/src/cephadm/cephadmlib/data_utils.py @@ -189,8 +189,9 @@ def normalize_image_digest(digest: str) -> str: return digest -def get_legacy_config_fsid(cluster, legacy_dir=None): - # type: (str, Optional[str]) -> Optional[str] +def get_legacy_config_fsid( + cluster: str, legacy_dir: Optional[str] = None +) -> Optional[str]: config_file = '/etc/ceph/%s.conf' % cluster if legacy_dir is not None: config_file = os.path.abspath(legacy_dir + config_file) diff --git a/src/cephadm/cephadmlib/exceptions.py b/src/cephadm/cephadmlib/exceptions.py index 0d215fdd332..762ce782127 100644 --- a/src/cephadm/cephadmlib/exceptions.py +++ b/src/cephadm/cephadmlib/exceptions.py @@ -19,3 +19,16 @@ class UnauthorizedRegistryError(Error): class PortOccupiedError(Error): pass + + +class DaemonStartException(Exception): + """ + Special exception type we raise when the + systemctl start command fails during daemon + deployment. Necessary because the cephadm mgr module + needs to handle this case differently than a failure + earlier in the deploy process where no attempt was made + to actually start the daemon + """ + + pass diff --git a/src/cephadm/cephadmlib/file_utils.py b/src/cephadm/cephadmlib/file_utils.py index 399729f2dcc..27e70e31756 100644 --- a/src/cephadm/cephadmlib/file_utils.py +++ b/src/cephadm/cephadmlib/file_utils.py @@ -52,8 +52,9 @@ def write_new( os.rename(tempname, destination) -def populate_files(config_dir, config_files, uid, gid): - # type: (str, Dict, int, int) -> None +def populate_files( + config_dir: str, config_files: Dict, uid: int, gid: int +) -> None: """create config files for different services""" for fname in config_files: config_file = os.path.join(config_dir, fname) @@ -71,8 +72,7 @@ def touch( os.chown(file_path, uid, gid) -def write_tmp(s, uid, gid): - # type: (str, int, int) -> IO[str] +def write_tmp(s: str, uid: int, gid: int) -> IO[str]: tmp_f = tempfile.NamedTemporaryFile(mode='w', prefix='ceph-tmp') os.fchown(tmp_f.fileno(), uid, gid) tmp_f.write(s) @@ -97,8 +97,7 @@ def recursive_chown(path: str, uid: int, gid: int) -> None: os.chown(os.path.join(dirpath, filename), uid, gid) -def read_file(path_list, file_name=''): - # type: (List[str], str) -> str +def read_file(path_list: List[str], file_name: str = '') -> str: """Returns the content of the first file found within the `path_list` :param path_list: list of file paths to search @@ -123,14 +122,12 @@ def read_file(path_list, file_name=''): return 'Unknown' -def pathify(p): - # type: (str) -> str +def pathify(p: str) -> str: p = os.path.expanduser(p) return os.path.abspath(p) -def get_file_timestamp(fn): - # type: (str) -> Optional[str] +def get_file_timestamp(fn: str) -> Optional[str]: try: mt = os.path.getmtime(fn) return datetime.datetime.fromtimestamp( diff --git a/src/cephadm/cephadmlib/systemd.py b/src/cephadm/cephadmlib/systemd.py index a07757eccad..1956957d457 100644 --- a/src/cephadm/cephadmlib/systemd.py +++ b/src/cephadm/cephadmlib/systemd.py @@ -11,8 +11,7 @@ from .packagers import Packager logger = logging.getLogger() -def check_unit(ctx, unit_name): - # type: (CephadmContext, str) -> Tuple[bool, str, bool] +def check_unit(ctx: CephadmContext, unit_name: str) -> Tuple[bool, str, bool]: # NOTE: we ignore the exit code here because systemctl outputs # various exit codes based on the state of the service, but the # string result is more explicit (and sufficient). @@ -56,8 +55,9 @@ def check_unit(ctx, unit_name): return (enabled, state, installed) -def check_units(ctx, units, enabler=None): - # type: (CephadmContext, List[str], Optional[Packager]) -> bool +def check_units( + ctx: CephadmContext, units: List[str], enabler: Optional[Packager] = None +) -> bool: for u in units: (enabled, state, installed) = check_unit(ctx, u) if enabled and state == 'running': diff --git a/src/cephadm/tests/test_agent.py b/src/cephadm/tests/test_agent.py index 52cce74e1fb..8e453e3ac3c 100644 --- a/src/cephadm/tests/test_agent.py +++ b/src/cephadm/tests/test_agent.py @@ -668,7 +668,7 @@ def test_mgr_listener_run(_load_cert_chain, _load_verify_locations, _handle_json agent.mgr_listener.run() # verify payload was correctly extracted - assert _handle_json_payload.called_with(json.loads(payload)) + _handle_json_payload.assert_called_with(json.loads(payload)) FakeConn.send.assert_called_once_with(b'ACK') # second run, with bad json data received diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py index f27b9bcd362..bbaaf2d39f8 100644 --- a/src/cephadm/tests/test_cephadm.py +++ b/src/cephadm/tests/test_cephadm.py @@ -1,5 +1,6 @@ # type: ignore +import contextlib import copy import errno import json @@ -38,6 +39,13 @@ def get_ceph_conf( mon_host = {mon_host} ''' +@contextlib.contextmanager +def bootstrap_test_ctx(*args, **kwargs): + with with_cephadm_ctx(*args, **kwargs) as ctx: + ctx.no_cleanup_on_failure = True + yield ctx + + class TestCephAdm(object): @mock.patch('cephadm.logger') @@ -1432,13 +1440,13 @@ class TestBootstrap(object): '--config', conf_file, ) - with with_cephadm_ctx(cmd) as ctx: + with bootstrap_test_ctx(cmd) as ctx: msg = r'No such file or directory' with pytest.raises(_cephadm.Error, match=msg): _cephadm.command_bootstrap(ctx) cephadm_fs.create_file(conf_file) - with with_cephadm_ctx(cmd) as ctx: + with bootstrap_test_ctx(cmd) as ctx: retval = _cephadm.command_bootstrap(ctx) assert retval == 0 @@ -1446,7 +1454,7 @@ class TestBootstrap(object): funkypatch.patch('cephadmlib.systemd.call') cmd = self._get_cmd() - with with_cephadm_ctx(cmd) as ctx: + with bootstrap_test_ctx(cmd) as ctx: msg = r'must specify --mon-ip or --mon-addrv' with pytest.raises(_cephadm.Error, match=msg): _cephadm.command_bootstrap(ctx) @@ -1455,13 +1463,13 @@ class TestBootstrap(object): funkypatch.patch('cephadmlib.systemd.call') cmd = self._get_cmd('--mon-ip', '192.168.1.1') - with with_cephadm_ctx(cmd, list_networks={}) as ctx: + with bootstrap_test_ctx(cmd, list_networks={}) as ctx: msg = r'--skip-mon-network' with pytest.raises(_cephadm.Error, match=msg): _cephadm.command_bootstrap(ctx) cmd += ['--skip-mon-network'] - with with_cephadm_ctx(cmd, list_networks={}) as ctx: + with bootstrap_test_ctx(cmd, list_networks={}) as ctx: retval = _cephadm.command_bootstrap(ctx) assert retval == 0 @@ -1540,12 +1548,12 @@ class TestBootstrap(object): cmd = self._get_cmd('--mon-ip', mon_ip) if not result: - with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx: + with bootstrap_test_ctx(cmd, list_networks=list_networks) as ctx: msg = r'--skip-mon-network' with pytest.raises(_cephadm.Error, match=msg): _cephadm.command_bootstrap(ctx) else: - with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx: + with bootstrap_test_ctx(cmd, list_networks=list_networks) as ctx: retval = _cephadm.command_bootstrap(ctx) assert retval == 0 @@ -1604,11 +1612,11 @@ class TestBootstrap(object): cmd = self._get_cmd('--mon-addrv', mon_addrv) if err: - with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx: + with bootstrap_test_ctx(cmd, list_networks=list_networks) as ctx: with pytest.raises(_cephadm.Error, match=err): _cephadm.command_bootstrap(ctx) else: - with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx: + with bootstrap_test_ctx(cmd, list_networks=list_networks) as ctx: retval = _cephadm.command_bootstrap(ctx) assert retval == 0 @@ -1621,13 +1629,13 @@ class TestBootstrap(object): '--skip-mon-network', ) - with with_cephadm_ctx(cmd, hostname=hostname) as ctx: + with bootstrap_test_ctx(cmd, hostname=hostname) as ctx: msg = r'--allow-fqdn-hostname' with pytest.raises(_cephadm.Error, match=msg): _cephadm.command_bootstrap(ctx) cmd += ['--allow-fqdn-hostname'] - with with_cephadm_ctx(cmd, hostname=hostname) as ctx: + with bootstrap_test_ctx(cmd, hostname=hostname) as ctx: retval = _cephadm.command_bootstrap(ctx) assert retval == 0 @@ -1646,7 +1654,7 @@ class TestBootstrap(object): '--fsid', fsid, ) - with with_cephadm_ctx(cmd) as ctx: + with bootstrap_test_ctx(cmd) as ctx: if err: with pytest.raises(_cephadm.Error, match=err): _cephadm.command_bootstrap(ctx) @@ -1661,7 +1669,7 @@ class TestShell(object): fsid = '00000000-0000-0000-0000-0000deadbeef' cmd = ['shell', '--fsid', fsid] - with with_cephadm_ctx(cmd) as ctx: + with bootstrap_test_ctx(cmd) as ctx: retval = _cephadm.command_shell(ctx) assert retval == 0 assert ctx.fsid == fsid diff --git a/src/cephadm/tests/test_deploy.py b/src/cephadm/tests/test_deploy.py index 58f212beff6..c5094db335f 100644 --- a/src/cephadm/tests/test_deploy.py +++ b/src/cephadm/tests/test_deploy.py @@ -495,6 +495,7 @@ def test_deploy_ceph_exporter_container(cephadm_fs, funkypatch): def test_deploy_and_rm_iscsi(cephadm_fs, funkypatch): # Test that the deploy and remove paths for iscsi (which has sidecar container) # create and remove the correct unit files. + funkypatch.patch('shutil.rmtree') # fakefs + shutil.rmtree breaks on py3.12 mocks = _common_patches(funkypatch) _firewalld = mocks['Firewalld'] fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7' diff --git a/src/cephadm/tox.ini b/src/cephadm/tox.ini index b999a0f552b..d643b1ba74f 100644 --- a/src/cephadm/tox.ini +++ b/src/cephadm/tox.ini @@ -12,14 +12,14 @@ skipsdist = true max-line-length = 100 inline-quotes = ' ignore = - E501, \ + E501, W503, exclude = - .tox, \ - .vagrant, \ - __pycache__, \ - *.pyc, \ - templates, \ + .tox, + .vagrant, + __pycache__, + *.pyc, + templates, .eggs statistics = True @@ -53,7 +53,7 @@ commands = mypy --config-file ../mypy.ini {posargs:cephadm.py cephadmlib} [testenv:flake8] allowlist_externals = bash deps = - flake8 == 5.0.4 + flake8 flake8-quotes commands = flake8 --config=tox.ini {posargs:cephadm.py cephadmlib} diff --git a/src/client/Client.cc b/src/client/Client.cc index 21555d0d07c..c404057b929 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -3843,6 +3843,7 @@ void Client::send_cap(Inode *in, MetaSession *session, Cap *cap, want, flush, cap->mseq, + cap->issue_seq, cap_epoch_barrier); /* * Since the setattr will check the cephx mds auth access before @@ -3856,7 +3857,6 @@ void Client::send_cap(Inode *in, MetaSession *session, Cap *cap, m->caller_uid = -1; m->caller_gid = -1; - m->head.issue_seq = cap->issue_seq; m->set_tid(flush_tid); m->head.uid = in->uid; @@ -5521,10 +5521,10 @@ void Client::handle_cap_export(MetaSession *session, Inode *in, const MConstRef< if (it != in->caps.end()) { Cap &tcap = it->second; if (tcap.cap_id == m->peer.cap_id && - ceph_seq_cmp(tcap.seq, m->peer.seq) < 0) { + ceph_seq_cmp(tcap.seq, m->peer.issue_seq) < 0) { tcap.cap_id = m->peer.cap_id; - tcap.seq = m->peer.seq - 1; - tcap.issue_seq = tcap.seq; + tcap.seq = m->peer.issue_seq - 1; + tcap.issue_seq = tcap.issue_seq; tcap.issued |= cap.issued; tcap.implemented |= cap.issued; if (&cap == in->auth_cap) @@ -5534,7 +5534,7 @@ void Client::handle_cap_export(MetaSession *session, Inode *in, const MConstRef< } } else { add_update_cap(in, tsession.get(), m->peer.cap_id, cap.issued, 0, - m->peer.seq - 1, m->peer.mseq, (uint64_t)-1, + m->peer.issue_seq - 1, m->peer.mseq, (uint64_t)-1, &cap == in->auth_cap ? CEPH_CAP_FLAG_AUTH : 0, cap.latest_perms); } diff --git a/src/client/MetaSession.cc b/src/client/MetaSession.cc index b5160a84331..3baa833851f 100644 --- a/src/client/MetaSession.cc +++ b/src/client/MetaSession.cc @@ -56,7 +56,7 @@ void MetaSession::enqueue_cap_release(inodeno_t ino, uint64_t cap_id, ceph_seq_t ceph_mds_cap_item i; i.ino = ino; i.cap_id = cap_id; - i.seq = iseq; + i.issue_seq = iseq; i.migrate_seq = mseq; release->caps.push_back(i); } diff --git a/src/client/SyntheticClient.cc b/src/client/SyntheticClient.cc index 3b408dd3f2d..6b315d2dee3 100644 --- a/src/client/SyntheticClient.cc +++ b/src/client/SyntheticClient.cc @@ -290,6 +290,7 @@ SyntheticClient::SyntheticClient(StandaloneClient *client, int w) void *synthetic_client_thread_entry(void *ptr) { + ceph_pthread_setname("client"); SyntheticClient *sc = static_cast<SyntheticClient*>(ptr); //int r = sc->run(); @@ -945,7 +946,6 @@ int SyntheticClient::start_thread() pthread_create(&thread_id, NULL, synthetic_client_thread_entry, this); ceph_assert(thread_id); - ceph_pthread_setname(thread_id, "client"); return 0; } diff --git a/src/common/DecayCounter.h b/src/common/DecayCounter.h index 9455ecc5a33..30570c72a30 100644 --- a/src/common/DecayCounter.h +++ b/src/common/DecayCounter.h @@ -16,7 +16,6 @@ #define CEPH_DECAYCOUNTER_H #include "include/buffer.h" -#include "common/Formatter.h" #include "common/StackStringStream.h" #include "common/ceph_time.h" @@ -24,6 +23,8 @@ #include <list> #include <sstream> +namespace ceph { class Formatter; } + /** * * TODO: normalize value based on some function of half_life, diff --git a/src/common/Graylog.cc b/src/common/Graylog.cc index cbd63fab25f..099acacd803 100644 --- a/src/common/Graylog.cc +++ b/src/common/Graylog.cc @@ -2,6 +2,9 @@ // vim: ts=8 sw=2 smarttab #include "Graylog.h" + +#include <iostream> // for std::cerr + #include "common/Formatter.h" #include "common/LogEntry.h" #include "log/Entry.h" diff --git a/src/common/Journald.cc b/src/common/Journald.cc index 164b65834a6..12e1a97e998 100644 --- a/src/common/Journald.cc +++ b/src/common/Journald.cc @@ -14,6 +14,9 @@ #include <sys/un.h> #include <syslog.h> #include <unistd.h> + +#include <iostream> // for std::cerr + #include <fmt/format.h> #include <fmt/ostream.h> @@ -23,7 +26,6 @@ #include "log/SubsystemMap.h" #include "msg/msg_fmt.h" - namespace ceph::logging { namespace { diff --git a/src/common/StackStringStream.h b/src/common/StackStringStream.h index 8cb48ff6fcd..6a144fb938a 100644 --- a/src/common/StackStringStream.h +++ b/src/common/StackStringStream.h @@ -18,10 +18,9 @@ #include <boost/container/small_vector.hpp> #include <algorithm> -#include <iostream> #include <memory> #include <ostream> -#include <sstream> +#include <string> #include <string_view> #include <vector> diff --git a/src/common/Thread.cc b/src/common/Thread.cc index 3903e8c0ed7..c714aa0aa87 100644 --- a/src/common/Thread.cc +++ b/src/common/Thread.cc @@ -83,7 +83,7 @@ void *Thread::entry_wrapper() if (pid && cpuid >= 0) _set_affinity(cpuid); - ceph_pthread_setname(pthread_self(), Thread::thread_name.c_str()); + ceph_pthread_setname(thread_name.c_str()); return entry(); } @@ -154,7 +154,7 @@ int Thread::try_create(size_t stacksize) void Thread::create(const char *name, size_t stacksize) { ceph_assert(strlen(name) < 16); - Thread::thread_name = name; + thread_name = name; int ret = try_create(stacksize); if (ret != 0) { @@ -203,24 +203,6 @@ int Thread::set_affinity(int id) // Functions for std::thread // ========================= -void set_thread_name(std::thread& t, const std::string& s) { - int r = ceph_pthread_setname(t.native_handle(), s.c_str()); - if (r != 0) { - throw std::system_error(r, std::generic_category()); - } -} -std::string get_thread_name(const std::thread& t) { - std::string s(256, '\0'); - - int r = ceph_pthread_getname(const_cast<std::thread&>(t).native_handle(), - s.data(), s.length()); - if (r != 0) { - throw std::system_error(r, std::generic_category()); - } - s.resize(std::strlen(s.data())); - return s; -} - void kill(std::thread& t, int signal) { auto r = ceph_pthread_kill(t.native_handle(), signal); diff --git a/src/common/Thread.h b/src/common/Thread.h index d3892c1b36b..8dc0e6c3cbe 100644 --- a/src/common/Thread.h +++ b/src/common/Thread.h @@ -17,8 +17,8 @@ #define CEPH_THREAD_H #include <functional> +#include <string> #include <string_view> -#include <system_error> #include <thread> #include <cstring> @@ -27,7 +27,6 @@ #include "include/ceph_assert.h" #include "include/compat.h" -#include "include/spinlock.h" extern pid_t ceph_gettid(); @@ -36,7 +35,7 @@ class Thread { pthread_t thread_id; pid_t pid; int cpuid; - static inline thread_local std::string thread_name; + std::string thread_name; void *entry_wrapper(); @@ -64,15 +63,10 @@ class Thread { int join(void **prval = 0); int detach(); int set_affinity(int cpuid); - static const std::string get_thread_name() { - return Thread::thread_name; - } }; // Functions for with std::thread -void set_thread_name(std::thread& t, const std::string& s); -std::string get_thread_name(const std::thread& t); void kill(std::thread& t, int signal); template<typename Fun, typename... Args> @@ -81,7 +75,7 @@ std::thread make_named_thread(std::string_view n, Args&& ...args) { return std::thread([n = std::string(n)](auto&& fun, auto&& ...args) { - ceph_pthread_setname(pthread_self(), n.data()); + ceph_pthread_setname(n.data()); std::invoke(std::forward<Fun>(fun), std::forward<Args>(args)...); }, std::forward<Fun>(fun), std::forward<Args>(args)...); diff --git a/src/common/Throttle.h b/src/common/Throttle.h index e190b946c45..fb5d949b438 100644 --- a/src/common/Throttle.h +++ b/src/common/Throttle.h @@ -6,7 +6,7 @@ #include <atomic> #include <chrono> -#include <iostream> +#include <iosfwd> #include <list> #include <map> diff --git a/src/common/admin_socket.cc b/src/common/admin_socket.cc index 1e73ce0836a..55b87de3207 100644 --- a/src/common/admin_socket.cc +++ b/src/common/admin_socket.cc @@ -12,7 +12,13 @@ * */ #include <poll.h> +#include <signal.h> #include <sys/un.h> + +#ifndef WIN32 +#include <sys/wait.h> +#endif + #include <optional> #include <stdlib.h> diff --git a/src/common/assert.cc b/src/common/assert.cc index 7fb4c2d726b..68ad99c878e 100644 --- a/src/common/assert.cc +++ b/src/common/assert.cc @@ -44,8 +44,7 @@ namespace ceph { g_assert_line = line; g_assert_func = func; g_assert_thread = (unsigned long long)pthread_self(); - ceph_pthread_getname(pthread_self(), g_assert_thread_name, - sizeof(g_assert_thread_name)); + ceph_pthread_getname(g_assert_thread_name, sizeof(g_assert_thread_name)); ostringstream tss; tss << ceph_clock_now(); @@ -122,8 +121,7 @@ namespace ceph { g_assert_line = line; g_assert_func = func; g_assert_thread = (unsigned long long)pthread_self(); - ceph_pthread_getname(pthread_self(), g_assert_thread_name, - sizeof(g_assert_thread_name)); + ceph_pthread_getname(g_assert_thread_name, sizeof(g_assert_thread_name)); BufAppender ba(g_assert_msg, sizeof(g_assert_msg)); BackTrace *bt = new ClibBackTrace(1); @@ -168,8 +166,7 @@ namespace ceph { g_assert_line = line; g_assert_func = func; g_assert_thread = (unsigned long long)pthread_self(); - ceph_pthread_getname(pthread_self(), g_assert_thread_name, - sizeof(g_assert_thread_name)); + ceph_pthread_getname(g_assert_thread_name, sizeof(g_assert_thread_name)); BackTrace *bt = new ClibBackTrace(1); snprintf(g_assert_msg, sizeof(g_assert_msg), @@ -210,8 +207,7 @@ namespace ceph { g_assert_line = line; g_assert_func = func; g_assert_thread = (unsigned long long)pthread_self(); - ceph_pthread_getname(pthread_self(), g_assert_thread_name, - sizeof(g_assert_thread_name)); + ceph_pthread_getname(g_assert_thread_name, sizeof(g_assert_thread_name)); BufAppender ba(g_assert_msg, sizeof(g_assert_msg)); BackTrace *bt = new ClibBackTrace(1); diff --git a/src/common/buffer.cc b/src/common/buffer.cc index b4640979289..4443ef14124 100644 --- a/src/common/buffer.cc +++ b/src/common/buffer.cc @@ -19,6 +19,8 @@ #include <sys/uio.h> +#include <iostream> + #include "include/ceph_assert.h" #include "include/types.h" #include "include/buffer_raw.h" diff --git a/src/common/ceph_argparse.cc b/src/common/ceph_argparse.cc index 9b989fe7270..ad12e0b6764 100644 --- a/src/common/ceph_argparse.cc +++ b/src/common/ceph_argparse.cc @@ -16,6 +16,7 @@ #include "auth/Auth.h" #include "common/ceph_argparse.h" #include "common/config.h" +#include "common/strtol.h" // for strict_strtof() #include "common/version.h" #include "include/str_list.h" diff --git a/src/common/ceph_argparse.h b/src/common/ceph_argparse.h index d63a2bdd796..5a160dd0b79 100644 --- a/src/common/ceph_argparse.h +++ b/src/common/ceph_argparse.h @@ -29,6 +29,8 @@ #include "common/entity_name.h" #include "include/encoding.h" +class entity_addrvec_t; + /////////////////////// Types /////////////////////// class CephInitParameters { diff --git a/src/common/ceph_time.h b/src/common/ceph_time.h index bae038862cf..01feff4c063 100644 --- a/src/common/ceph_time.h +++ b/src/common/ceph_time.h @@ -16,7 +16,7 @@ #define COMMON_CEPH_TIME_H #include <chrono> -#include <iostream> +#include <iosfwd> #include <string> #include <optional> #include <fmt/chrono.h> diff --git a/src/common/ceph_timer.h b/src/common/ceph_timer.h index bc324bfa243..7fb2c7bac12 100644 --- a/src/common/ceph_timer.h +++ b/src/common/ceph_timer.h @@ -98,6 +98,7 @@ class timer { std::thread thread; void timer_thread() { + ceph_pthread_setname("ceph_timer"); std::unique_lock l(lock); while (!suspended) { auto now = TC::now(); @@ -155,7 +156,6 @@ class timer { public: timer() : suspended(false) { thread = std::thread(&timer::timer_thread, this); - set_thread_name(thread, "ceph_timer"); } // Create a suspended timer, jobs will be executed in order when diff --git a/src/common/code_environment.cc b/src/common/code_environment.cc index 14d55f60c30..21633fc5d41 100644 --- a/src/common/code_environment.cc +++ b/src/common/code_environment.cc @@ -11,6 +11,7 @@ * Foundation. See file COPYING. * */ +#include "include/compat.h" #include "common/code_environment.h" @@ -18,10 +19,6 @@ #include "acconfig.h" -#ifdef HAVE_PTHREAD_GETNAME_NP -#include <pthread.h> -#endif - #include <string.h> code_environment_t g_code_env = CODE_ENVIRONMENT_UTILITY; @@ -57,7 +54,7 @@ int get_process_name(char *buf, int len) } // FIPS zeroization audit 20191115: this memset is not security related. memset(buf, 0, len); - return pthread_getname_np(pthread_self(), buf, len); + return ceph_pthread_getname(buf, len); } #elif defined(HAVE_GETPROGNAME) diff --git a/src/common/compat.cc b/src/common/compat.cc index 82b57ad94b5..84a395c5a19 100644 --- a/src/common/compat.cc +++ b/src/common/compat.cc @@ -565,3 +565,66 @@ ssize_t get_self_exe_path(char* path, int buff_length) { } #endif /* _WIN32 */ + + +static thread_local char cached_thread_name[256]{}; + +int ceph_pthread_setname(char const* name) +{ + strncpy(cached_thread_name, name, sizeof cached_thread_name - 1); +#if defined(_WIN32) && defined(__clang__) && \ + !defined(_LIBCPP_HAS_THREAD_API_PTHREAD) + // In this case, llvm doesn't use the pthread api for std::thread. + // We cannot use native_handle() with the pthread api, nor can we pass + // it to Windows API functions. + return 0; +#elif defined(HAVE_PTHREAD_SETNAME_NP) + #if defined(__APPLE__) + return pthread_setname_np(name); + #else + return pthread_setname_np(pthread_self(), name); + #endif +#elif defined(HAVE_PTHREAD_SET_NAME_NP) + pthread_set_name_np(pthread_self(), name); \ + return 0; +#else + return 0; +#endif +} + +int ceph_pthread_getname(char* name, size_t len) +{ + if (cached_thread_name[0]) { + if (len > 0) { + strncpy(name, cached_thread_name, len); + name[len-1] = 0; + } + return 0; + } else { +#if defined(_WIN32) && defined(__clang__) && \ + !defined(_LIBCPP_HAS_THREAD_API_PTHREAD) + if (len > 0) { + strcpy(name, ""); + } + return 0; +#elif defined(HAVE_PTHREAD_GETNAME_NP) || defined(HAVE_PTHREAD_GET_NAME_NP) +# if defined(HAVE_PTHREAD_GETNAME_NP) + int rc = pthread_getname_np(pthread_self(), cached_thread_name, sizeof cached_thread_name); +# else + int rc = pthread_get_name_np(pthread_self(), cached_thread_name, sizeof cached_thread_name); +# endif + if (rc == 0) { + strncpy(name, cached_thread_name, len); + name[len-1] = 0; + return 0; + } else { + return rc; + } +#else + if (len > 0) { + strcpy(name, ""); + } + return 0; +#endif + } +} diff --git a/src/common/config_cacher.h b/src/common/config_cacher.h index a84bad08eee..91b8152dde1 100644 --- a/src/common/config_cacher.h +++ b/src/common/config_cacher.h @@ -50,7 +50,7 @@ public: conf.remove_observer(this); } - operator ValueT() const { + ValueT operator*() const { return value_cache.load(); } }; diff --git a/src/common/error_code.cc b/src/common/error_code.cc index ed0e681b22b..9c981a21077 100644 --- a/src/common/error_code.cc +++ b/src/common/error_code.cc @@ -13,10 +13,9 @@ * COPYING. */ -#include <exception> +#include "common/error_code.h" #include <boost/asio/error.hpp> -#include "common/error_code.h" #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wnon-virtual-dtor" diff --git a/src/common/error_code.h b/src/common/error_code.h index e39122f8ce3..93a1bf31c00 100644 --- a/src/common/error_code.h +++ b/src/common/error_code.h @@ -16,9 +16,8 @@ #ifndef COMMON_CEPH_ERROR_CODE #define COMMON_CEPH_ERROR_CODE -#include <netdb.h> - -#include <boost/system.hpp> +#include <boost/system/error_code.hpp> +#include <boost/system/system_error.hpp> #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wnon-virtual-dtor" diff --git a/src/common/obj_bencher.cc b/src/common/obj_bencher.cc index 32ecc958618..f5e744e2339 100644 --- a/src/common/obj_bencher.cc +++ b/src/common/obj_bencher.cc @@ -99,6 +99,7 @@ ostream& ObjBencher::out(ostream& os) } void *ObjBencher::status_printer(void *_bencher) { + ceph_pthread_setname("OB::stat_print"); ObjBencher *bencher = static_cast<ObjBencher *>(_bencher); bench_data& data = bencher->data; Formatter *formatter = bencher->formatter; @@ -453,7 +454,6 @@ int ObjBencher::write_bench(int secondsToRun, pthread_t print_thread; pthread_create(&print_thread, NULL, ObjBencher::status_printer, (void *)this); - ceph_pthread_setname(print_thread, "write_stat"); std::unique_lock locker{lock}; data.finished = 0; data.start_time = mono_clock::now(); @@ -691,7 +691,6 @@ int ObjBencher::seq_read_bench( pthread_t print_thread; pthread_create(&print_thread, NULL, status_printer, (void *)this); - ceph_pthread_setname(print_thread, "seq_read_stat"); mono_time finish_time = data.start_time + time_to_run; //start initial reads @@ -903,7 +902,6 @@ int ObjBencher::rand_read_bench( pthread_t print_thread; pthread_create(&print_thread, NULL, status_printer, (void *)this); - ceph_pthread_setname(print_thread, "rand_read_stat"); mono_time finish_time = data.start_time + time_to_run; //start initial reads diff --git a/src/common/options.cc b/src/common/options.cc index a68e2474a3d..3f6894b01c1 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -5,6 +5,7 @@ #include "options.h" #include "common/Formatter.h" #include "common/options/build_options.h" +#include "common/strtol.h" // for strict_si_cast() // Helpers for validators #include "include/stringify.h" diff --git a/src/common/options.h b/src/common/options.h index abded4cc0dd..ec6db7770c3 100644 --- a/src/common/options.h +++ b/src/common/options.h @@ -4,6 +4,7 @@ #pragma once #include <chrono> +#include <iostream> // for std::cerr #include <string> #include <variant> #include <vector> diff --git a/src/common/options/mds.yaml.in b/src/common/options/mds.yaml.in index 18efba561ed..94824faef6b 100644 --- a/src/common/options/mds.yaml.in +++ b/src/common/options/mds.yaml.in @@ -586,16 +586,6 @@ options: min: 1 services: - mds -- name: mds_log_major_segment_event_ratio - type: uint - level: advanced - desc: multiple of mds_log_events_per_segment between major segments - default: 12 - services: - - mds - min: 1 - see_also: - - mds_log_events_per_segment # segment size for mds log, default to default file_layout_t - name: mds_log_segment_size type: size @@ -1741,3 +1731,12 @@ options: - mds flags: - runtime +- name: mds_log_minor_segments_per_major_segment + type: uint + level: advanced + desc: number of minor segments per major segment. + long_desc: The number of minor mds log segments since last major segment after which a major segment is started/logged. + default: 16 + services: + - mds + min: 8 diff --git a/src/common/options/osd.yaml.in b/src/common/options/osd.yaml.in index 3bfff92c907..49099f42b71 100644 --- a/src/common/options/osd.yaml.in +++ b/src/common/options/osd.yaml.in @@ -346,7 +346,7 @@ options: default: 5 see_also: - osd_scrub_chunk_max - with_legacy: true + with_legacy: false - name: osd_scrub_chunk_max type: int level: advanced @@ -357,7 +357,7 @@ options: default: 15 see_also: - osd_scrub_chunk_min - with_legacy: true + with_legacy: false - name: osd_shallow_scrub_chunk_min type: int level: advanced @@ -369,7 +369,7 @@ options: see_also: - osd_shallow_scrub_chunk_max - osd_scrub_chunk_min - with_legacy: true + with_legacy: false - name: osd_shallow_scrub_chunk_max type: int level: advanced @@ -380,7 +380,7 @@ options: see_also: - osd_shallow_scrub_chunk_min - osd_scrub_chunk_max - with_legacy: true + with_legacy: false # sleep between [deep]scrub ops - name: osd_scrub_sleep type: float diff --git a/src/common/perf_counters.cc b/src/common/perf_counters.cc index b5e361b505c..2eeaa80aae8 100644 --- a/src/common/perf_counters.cc +++ b/src/common/perf_counters.cc @@ -18,6 +18,7 @@ #include "common/dout.h" #include "common/valgrind.h" #include "include/common_fwd.h" +#include "include/utime.h" using std::ostringstream; using std::make_pair; diff --git a/src/common/perf_counters.h b/src/common/perf_counters.h index 942edf6d7e5..0d0fe86a092 100644 --- a/src/common/perf_counters.h +++ b/src/common/perf_counters.h @@ -17,6 +17,8 @@ #ifndef CEPH_COMMON_PERF_COUNTERS_H #define CEPH_COMMON_PERF_COUNTERS_H +#include <functional> +#include <set> #include <string> #include <vector> #include <memory> @@ -24,11 +26,12 @@ #include <cstdint> #include "common/perf_histogram.h" -#include "include/utime.h" #include "include/common_fwd.h" #include "common/ceph_mutex.h" #include "common/ceph_time.h" +class utime_t; + namespace TOPNSPC::common { class CephContext; class PerfCountersBuilder; diff --git a/src/common/perf_counters_cache.h b/src/common/perf_counters_cache.h index 866f56ee350..aa786fc5bf0 100644 --- a/src/common/perf_counters_cache.h +++ b/src/common/perf_counters_cache.h @@ -3,6 +3,7 @@ #include "common/perf_counters.h" #include "common/ceph_context.h" #include "common/intrusive_lru.h" +#include "include/utime.h" namespace ceph::perf_counters { diff --git a/src/common/pick_address.cc b/src/common/pick_address.cc index aa6b765bc56..d125d7171e0 100644 --- a/src/common/pick_address.cc +++ b/src/common/pick_address.cc @@ -15,6 +15,7 @@ #include "common/pick_address.h" #include <bitset> +#include <ifaddrs.h> // for struct ifaddrs #include <netdb.h> #include <netinet/in.h> #ifdef _WIN32 @@ -40,6 +41,7 @@ #include "common/debug.h" #include "common/errno.h" #include "common/numa.h" +#include "common/safe_io.h" #ifndef HAVE_IN_ADDR_T typedef uint32_t in_addr_t; diff --git a/src/crimson/os/alienstore/alien_store.cc b/src/crimson/os/alienstore/alien_store.cc index 3fd2bb1fd15..f390823a8a0 100644 --- a/src/crimson/os/alienstore/alien_store.cc +++ b/src/crimson/os/alienstore/alien_store.cc @@ -590,6 +590,19 @@ seastar::future<struct stat> AlienStore::stat( }); } +seastar::future<std::string> AlienStore::get_default_device_class() +{ + logger().debug("{}", __func__); + assert(tp); + return op_gates.simple_dispatch("get_default_device_class", [=, this] { + return tp->submit([=, this] { + return store->get_default_device_class(); + }).then([] (std::string device_class) { + return seastar::make_ready_future<std::string>(device_class); + }); + }); +} + auto AlienStore::omap_get_header(CollectionRef ch, const ghobject_t& oid) -> get_attr_errorator::future<ceph::bufferlist> diff --git a/src/crimson/os/alienstore/alien_store.h b/src/crimson/os/alienstore/alien_store.h index d36f449afd8..853585dac9c 100644 --- a/src/crimson/os/alienstore/alien_store.h +++ b/src/crimson/os/alienstore/alien_store.h @@ -98,6 +98,7 @@ public: seastar::future<struct stat> stat( CollectionRef, const ghobject_t&) final; + seastar::future<std::string> get_default_device_class() final; get_attr_errorator::future<ceph::bufferlist> omap_get_header( CollectionRef, const ghobject_t&) final; diff --git a/src/crimson/os/alienstore/thread_pool.cc b/src/crimson/os/alienstore/thread_pool.cc index 5cf9590e61e..277055ec51e 100644 --- a/src/crimson/os/alienstore/thread_pool.cc +++ b/src/crimson/os/alienstore/thread_pool.cc @@ -27,7 +27,7 @@ ThreadPool::ThreadPool(size_t n_threads, pin(*cpus); } block_sighup(); - (void) pthread_setname_np(pthread_self(), "alien-store-tp"); + (void) ceph_pthread_setname("alien-store-tp"); loop(queue_max_wait, i); }); } diff --git a/src/crimson/os/cyanstore/cyan_store.cc b/src/crimson/os/cyanstore/cyan_store.cc index 7b945e5aa15..3f861a9271f 100644 --- a/src/crimson/os/cyanstore/cyan_store.cc +++ b/src/crimson/os/cyanstore/cyan_store.cc @@ -143,6 +143,12 @@ CyanStore::list_collections() }); } +seastar::future<std::string> +CyanStore::get_default_device_class() +{ + return seastar::make_ready_future<std::string>(""); +} + CyanStore::mount_ertr::future<> CyanStore::Shard::mount() { static const char read_file_errmsg[]{"read_file"}; diff --git a/src/crimson/os/cyanstore/cyan_store.h b/src/crimson/os/cyanstore/cyan_store.h index 99583d07d36..e9394991bc2 100644 --- a/src/crimson/os/cyanstore/cyan_store.h +++ b/src/crimson/os/cyanstore/cyan_store.h @@ -221,6 +221,8 @@ public: seastar::future<std::vector<coll_core_t>> list_collections() final; + seastar::future<std::string> get_default_device_class() final; + private: seastar::sharded<CyanStore::Shard> shard_stores; const std::string path; diff --git a/src/crimson/os/futurized_store.h b/src/crimson/os/futurized_store.h index 0dca695ba3a..51ef2331014 100644 --- a/src/crimson/os/futurized_store.h +++ b/src/crimson/os/futurized_store.h @@ -203,6 +203,7 @@ public: using coll_core_t = std::pair<coll_t, core_id_t>; virtual seastar::future<std::vector<coll_core_t>> list_collections() = 0; + virtual seastar::future<std::string> get_default_device_class() = 0; protected: const core_id_t primary_core; }; diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 5dcb7514ee1..70fec7caca4 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -172,6 +172,7 @@ void Cache::register_metrics() {extent_types_t::LADDR_INTERNAL, sm::label_instance("ext", "LADDR_INTERNAL")}, {extent_types_t::LADDR_LEAF, sm::label_instance("ext", "LADDR_LEAF")}, {extent_types_t::DINK_LADDR_LEAF, sm::label_instance("ext", "DINK_LADDR_LEAF")}, + {extent_types_t::ROOT_META, sm::label_instance("ext", "ROOT_META")}, {extent_types_t::OMAP_INNER, sm::label_instance("ext", "OMAP_INNER")}, {extent_types_t::OMAP_LEAF, sm::label_instance("ext", "OMAP_LEAF")}, {extent_types_t::ONODE_BLOCK_STAGED, sm::label_instance("ext", "ONODE_BLOCK_STAGED")}, @@ -1093,6 +1094,9 @@ CachedExtentRef Cache::alloc_new_extent_by_type( case extent_types_t::LADDR_LEAF: return alloc_new_non_data_extent<lba_manager::btree::LBALeafNode>( t, length, hint, gen); + case extent_types_t::ROOT_META: + return alloc_new_non_data_extent<RootMetaBlock>( + t, length, hint, gen); case extent_types_t::ONODE_BLOCK_STAGED: return alloc_new_non_data_extent<onode::SeastoreNodeExtent>( t, length, hint, gen); @@ -2193,6 +2197,12 @@ Cache::do_get_caching_extent_by_type( ).safe_then([](auto extent) { return CachedExtentRef(extent.detach(), false /* add_ref */); }); + case extent_types_t::ROOT_META: + return do_get_caching_extent<RootMetaBlock>( + offset, length, std::move(extent_init_func), std::move(on_cache) + ).safe_then([](auto extent) { + return CachedExtentRef(extent.detach(), false /* add_ref */); + }); case extent_types_t::OMAP_INNER: return do_get_caching_extent<omap_manager::OMapInnerNode>( offset, length, std::move(extent_init_func), std::move(on_cache) diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index dba3610e95f..c37d9c5c7cd 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -978,7 +978,8 @@ public: auto result = epm.alloc_new_non_data_extent(t, T::TYPE, length, hint, gen); #endif if (!result) { - return nullptr; + SUBERRORT(seastore_cache, "insufficient space", t); + std::rethrow_exception(crimson::ct_error::enospc::exception_ptr()); } auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result->bp)); ret->init(CachedExtent::extent_state_t::INITIAL_WRITE_PENDING, @@ -1019,6 +1020,10 @@ public: #else auto results = epm.alloc_new_data_extents(t, T::TYPE, length, hint, gen); #endif + if (results.empty()) { + SUBERRORT(seastore_cache, "insufficient space", t); + std::rethrow_exception(crimson::ct_error::enospc::exception_ptr()); + } std::vector<TCachedExtentRef<T>> extents; for (auto &result : results) { auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result.bp)); diff --git a/src/crimson/os/seastore/onode.h b/src/crimson/os/seastore/onode.h index 072c57864be..fa2ed65c0f3 100644 --- a/src/crimson/os/seastore/onode.h +++ b/src/crimson/os/seastore/onode.h @@ -36,8 +36,8 @@ struct onode_layout_t { object_data_le_t object_data; - char oi[MAX_OI_LENGTH]; - char ss[MAX_SS_LENGTH]; + char oi[MAX_OI_LENGTH] = {0}; + char ss[MAX_SS_LENGTH] = {0}; } __attribute__((packed)); class Transaction; diff --git a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc index 9f6a566d15c..97b7902edf5 100644 --- a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc +++ b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc @@ -188,10 +188,10 @@ BlockRBManager::write_ertr::future<> BlockRBManager::write( void BlockRBManager::prefill_fragmented_device() { LOG_PREFIX(BlockRBManager::prefill_fragmented_device); - // the first 2 blocks must be allocated to lba root + // the first 3 blocks must be allocated to lba root // and backref root during mkfs - for (size_t block = get_block_size() * 2; - block <= get_size() - get_block_size() * 2; + for (size_t block = get_block_size() * 3; + block <= get_size() - get_block_size() * 3; block += get_block_size() * 2) { DEBUG("marking {}~{} used", get_start_rbm_addr() + block, diff --git a/src/crimson/os/seastore/root_meta.h b/src/crimson/os/seastore/root_meta.h new file mode 100644 index 00000000000..edf082f1e38 --- /dev/null +++ b/src/crimson/os/seastore/root_meta.h @@ -0,0 +1,76 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "crimson/os/seastore/cached_extent.h" + +namespace crimson::os::seastore { + +struct RootMetaBlock : LogicalCachedExtent { + using meta_t = std::map<std::string, std::string>; + using Ref = TCachedExtentRef<RootMetaBlock>; + static constexpr size_t SIZE = 4096; + static constexpr int MAX_META_LENGTH = 1024; + + explicit RootMetaBlock(ceph::bufferptr &&ptr) + : LogicalCachedExtent(std::move(ptr)) {} + explicit RootMetaBlock(extent_len_t length) + : LogicalCachedExtent(length) {} + RootMetaBlock(const RootMetaBlock &rhs) + : LogicalCachedExtent(rhs) {} + + CachedExtentRef duplicate_for_write(Transaction&) final { + return CachedExtentRef(new RootMetaBlock(*this)); + } + + static constexpr extent_types_t TYPE = extent_types_t::ROOT_META; + extent_types_t get_type() const final { + return extent_types_t::ROOT_META; + } + + /// dumps root meta as delta + ceph::bufferlist get_delta() final { + ceph::bufferlist bl; + ceph::buffer::ptr bptr(get_bptr(), 0, MAX_META_LENGTH); + bl.append(bptr); + return bl; + } + + /// overwrites root + void apply_delta(const ceph::bufferlist &_bl) final + { + assert(_bl.length() == MAX_META_LENGTH); + ceph::bufferlist bl = _bl; + bl.rebuild(); + get_bptr().copy_in(0, MAX_META_LENGTH, bl.front().c_str()); + } + + meta_t get_meta() const { + bufferlist bl; + bl.append(get_bptr()); + meta_t ret; + auto iter = bl.cbegin(); + decode(ret, iter); + return ret; + } + + void set_meta(const meta_t &m) { + ceph::bufferlist bl; + encode(m, bl); + ceph_assert(bl.length() <= MAX_META_LENGTH); + bl.rebuild(); + get_bptr().zero(0, MAX_META_LENGTH); + get_bptr().copy_in(0, bl.length(), bl.front().c_str()); + } + +}; +using RootMetaBlockRef = RootMetaBlock::Ref; + +} // crimson::os::seastore + + +#if FMT_VERSION >= 90000 +template <> struct fmt::formatter<crimson::os::seastore::RootMetaBlock> + : fmt::ostream_formatter {}; +#endif diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index d90edbb20db..2bf55d5cde7 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -2721,6 +2721,13 @@ SeaStore::read_meta(const std::string& key) ); } +seastar::future<std::string> SeaStore::get_default_device_class() +{ + using crimson::common::get_conf; + std::string type = get_conf<std::string>("seastore_main_device_type"); + return seastar::make_ready_future<std::string>(type); +} + uuid_d SeaStore::Shard::get_fsid() const { return device->get_meta().seastore_id; diff --git a/src/crimson/os/seastore/seastore.h b/src/crimson/os/seastore/seastore.h index 185072744f2..fd7e177da63 100644 --- a/src/crimson/os/seastore/seastore.h +++ b/src/crimson/os/seastore/seastore.h @@ -191,6 +191,8 @@ public: seastar::future<> write_meta(const std::string& key, const std::string& value); + seastar::future<std::string> get_default_device_class(); + store_statfs_t stat() const; uuid_d get_fsid() const; @@ -567,6 +569,8 @@ public: seastar::future<std::vector<coll_core_t>> list_collections() final; + seastar::future<std::string> get_default_device_class() final; + FuturizedStore::Shard& get_sharded_store() final { return shard_stores.local(); } diff --git a/src/crimson/os/seastore/seastore_types.cc b/src/crimson/os/seastore/seastore_types.cc index f379dd0117c..450118e5e75 100644 --- a/src/crimson/os/seastore/seastore_types.cc +++ b/src/crimson/os/seastore/seastore_types.cc @@ -246,6 +246,8 @@ std::ostream &operator<<(std::ostream &out, extent_types_t t) return out << "LADDR_LEAF"; case extent_types_t::ONODE_BLOCK_STAGED: return out << "ONODE_BLOCK_STAGED"; + case extent_types_t::ROOT_META: + return out << "ROOT_META"; case extent_types_t::OMAP_INNER: return out << "OMAP_INNER"; case extent_types_t::OMAP_LEAF: diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index df5c184e7ab..65cad878fba 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -1378,23 +1378,24 @@ enum class extent_types_t : uint8_t { LADDR_INTERNAL = 1, LADDR_LEAF = 2, DINK_LADDR_LEAF = 3, // should only be used for unitttests - OMAP_INNER = 4, - OMAP_LEAF = 5, - ONODE_BLOCK_STAGED = 6, - COLL_BLOCK = 7, - OBJECT_DATA_BLOCK = 8, - RETIRED_PLACEHOLDER = 9, + ROOT_META = 4, + OMAP_INNER = 5, + OMAP_LEAF = 6, + ONODE_BLOCK_STAGED = 7, + COLL_BLOCK = 8, + OBJECT_DATA_BLOCK = 9, + RETIRED_PLACEHOLDER = 10, // the following two types are not extent types, // they are just used to indicates paddr allocation deltas - ALLOC_INFO = 10, - JOURNAL_TAIL = 11, + ALLOC_INFO = 11, + JOURNAL_TAIL = 12, // Test Block Types - TEST_BLOCK = 12, - TEST_BLOCK_PHYSICAL = 13, - BACKREF_INTERNAL = 14, - BACKREF_LEAF = 15, + TEST_BLOCK = 13, + TEST_BLOCK_PHYSICAL = 14, + BACKREF_INTERNAL = 15, + BACKREF_LEAF = 16, // None and the number of valid extent_types_t - NONE = 16, + NONE = 17, }; using extent_types_le_t = uint8_t; constexpr auto EXTENT_TYPES_MAX = static_cast<uint8_t>(extent_types_t::NONE); @@ -1409,12 +1410,12 @@ constexpr bool is_data_type(extent_types_t type) { } constexpr bool is_logical_metadata_type(extent_types_t type) { - return type >= extent_types_t::OMAP_INNER && + return type >= extent_types_t::ROOT_META && type <= extent_types_t::COLL_BLOCK; } constexpr bool is_logical_type(extent_types_t type) { - if ((type >= extent_types_t::OMAP_INNER && + if ((type >= extent_types_t::ROOT_META && type <= extent_types_t::OBJECT_DATA_BLOCK) || type == extent_types_t::TEST_BLOCK) { assert(is_logical_metadata_type(type) || @@ -1926,44 +1927,18 @@ using backref_root_t = phy_tree_root_t; * TODO: generalize this to permit more than one lba_manager implementation */ struct __attribute__((packed)) root_t { - using meta_t = std::map<std::string, std::string>; - - static constexpr int MAX_META_LENGTH = 1024; - backref_root_t backref_root; lba_root_t lba_root; laddr_le_t onode_root; coll_root_le_t collection_root; + laddr_le_t meta; - char meta[MAX_META_LENGTH]; - - root_t() { - set_meta(meta_t{}); - } + root_t() = default; void adjust_addrs_from_base(paddr_t base) { lba_root.adjust_addrs_from_base(base); backref_root.adjust_addrs_from_base(base); } - - meta_t get_meta() { - bufferlist bl; - bl.append(ceph::buffer::create_static(MAX_META_LENGTH, meta)); - meta_t ret; - auto iter = bl.cbegin(); - decode(ret, iter); - return ret; - } - - void set_meta(const meta_t &m) { - ceph::bufferlist bl; - encode(m, bl); - ceph_assert(bl.length() < MAX_META_LENGTH); - bl.rebuild(); - auto &bptr = bl.front(); - ::memset(meta, 0, MAX_META_LENGTH); - ::memcpy(meta, bptr.c_str(), bl.length()); - } }; struct alloc_blk_t { diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index f4e3b0858f2..717c3822db9 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -74,6 +74,8 @@ TransactionManager::mkfs_ertr::future<> TransactionManager::mkfs() return lba_manager->mkfs(t); }).si_then([this, &t] { return backref_manager->mkfs(t); + }).si_then([this, &t] { + return init_root_meta(t); }).si_then([this, FNAME, &t] { INFOT("submitting mkfs transaction", t); return submit_transaction_direct(t); diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index c7a94a9ef11..841c5638abc 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -23,6 +23,7 @@ #include "crimson/os/seastore/logging.h" #include "crimson/os/seastore/seastore_types.h" #include "crimson/os/seastore/cache.h" +#include "crimson/os/seastore/root_meta.h" #include "crimson/os/seastore/lba_manager.h" #include "crimson/os/seastore/backref_manager.h" #include "crimson/os/seastore/journal.h" @@ -303,10 +304,6 @@ public: len, placement_hint, INIT_GENERATION); - if (!ext) { - SUBERRORT(seastore_tm, "insufficient space!", t); - return crimson::ct_error::enospc::make(); - } return lba_manager->alloc_extent( t, laddr_hint, @@ -342,10 +339,6 @@ public: len, placement_hint, INIT_GENERATION); - if (exts.empty()) { - SUBERRORT(seastore_tm, "insufficient space!", t); - return crimson::ct_error::enospc::make(); - } return lba_manager->alloc_extents( t, laddr_hint, @@ -690,9 +683,11 @@ public: const std::string &key) { return cache->get_root( t - ).si_then([&key, &t](auto root) { + ).si_then([&t, this](auto root) { + return read_extent<RootMetaBlock>(t, root->root.meta); + }).si_then([key, &t](auto mblock) { LOG_PREFIX(TransactionManager::read_root_meta); - auto meta = root->root.get_meta(); + auto meta = mblock->get_meta(); auto iter = meta.find(key); if (iter == meta.end()) { SUBDEBUGT(seastore_tm, "{} -> nullopt", t, key); @@ -701,7 +696,35 @@ public: SUBDEBUGT(seastore_tm, "{} -> {}", t, key, iter->second); return seastar::make_ready_future<read_root_meta_bare>(iter->second); } - }); + }).handle_error_interruptible( + crimson::ct_error::input_output_error::pass_further{}, + crimson::ct_error::assert_all{"unexpected error!"} + ); + } + + /** + * init_root_meta + * + * create the root meta block + */ + using init_root_meta_iertr = base_iertr; + using init_root_meta_ret = init_root_meta_iertr::future<>; + init_root_meta_ret init_root_meta(Transaction &t) { + return alloc_non_data_extent<RootMetaBlock>( + t, L_ADDR_MIN, RootMetaBlock::SIZE + ).si_then([this, &t](auto meta) { + meta->set_meta(RootMetaBlock::meta_t{}); + return cache->get_root(t + ).si_then([this, &t, meta](auto root) { + auto mroot = cache->duplicate_for_write( + t, root)->template cast<RootBlock>(); + mroot->root.meta = meta->get_laddr(); + return seastar::now(); + }); + }).handle_error_interruptible( + crimson::ct_error::input_output_error::pass_further{}, + crimson::ct_error::assert_all{"unexpected error!"} + ); } /** @@ -719,15 +742,21 @@ public: SUBDEBUGT(seastore_tm, "seastore_tm, {} -> {} ...", t, key, value); return cache->get_root( t - ).si_then([this, &t, &key, &value](RootBlockRef root) { - root = cache->duplicate_for_write(t, root)->cast<RootBlock>(); + ).si_then([this, &t](RootBlockRef root) { + return read_extent<RootMetaBlock>(t, root->root.meta); + }).si_then([this, key, value, &t](auto mblock) { + mblock = get_mutable_extent(t, mblock + )->template cast<RootMetaBlock>(); - auto meta = root->root.get_meta(); + auto meta = mblock->get_meta(); meta[key] = value; - root->root.set_meta(meta); + mblock->set_meta(meta); return seastar::now(); - }); + }).handle_error_interruptible( + crimson::ct_error::input_output_error::pass_further{}, + crimson::ct_error::assert_all{"unexpected error!"} + ); } /** diff --git a/src/crimson/osd/backfill_state.cc b/src/crimson/osd/backfill_state.cc index a77cbe87652..837fd2eb2af 100644 --- a/src/crimson/osd/backfill_state.cc +++ b/src/crimson/osd/backfill_state.cc @@ -379,16 +379,17 @@ BackfillState::Enqueuing::Enqueuing(my_context ctx) trim_backfilled_object_from_intervals(std::move(result), backfill_state().last_backfill_started, backfill_state().peer_backfill_info); - } else { + backfill_listener().maybe_flush(); + } else if (!primary_bi.empty()) { auto result = update_on_peers(check); trim_backfilled_object_from_intervals(std::move(result), backfill_state().last_backfill_started, backfill_state().peer_backfill_info); - if (!primary_bi.empty()) { - primary_bi.pop_front(); - } + primary_bi.pop_front(); + backfill_listener().maybe_flush(); + } else { + break; } - backfill_listener().maybe_flush(); } while (!all_emptied(primary_bi, backfill_state().peer_backfill_info)); if (backfill_state().progress_tracker->tracked_objects_completed() @@ -610,4 +611,12 @@ void BackfillState::ProgressTracker::complete_to( } } +void BackfillState::enqueue_standalone_push( + const hobject_t &obj, + const eversion_t &v, + const std::vector<pg_shard_t> &peers) { + progress_tracker->enqueue_push(obj); + backfill_machine.backfill_listener.enqueue_push(obj, v, peers); +} + } // namespace crimson::osd diff --git a/src/crimson/osd/backfill_state.h b/src/crimson/osd/backfill_state.h index a49cbeaac06..072c91e079d 100644 --- a/src/crimson/osd/backfill_state.h +++ b/src/crimson/osd/backfill_state.h @@ -304,6 +304,15 @@ public: backfill_machine.process_event(*std::move(evt)); } + void enqueue_standalone_push( + const hobject_t &obj, + const eversion_t &v, + const std::vector<pg_shard_t> &peers); + + bool is_triggered() const { + return backfill_machine.triggering_event() != nullptr; + } + hobject_t get_last_backfill_started() const { return last_backfill_started; } diff --git a/src/crimson/osd/ec_backend.cc b/src/crimson/osd/ec_backend.cc index 32eaaf02b3f..007d0bf35f3 100644 --- a/src/crimson/osd/ec_backend.cc +++ b/src/crimson/osd/ec_backend.cc @@ -26,6 +26,7 @@ ECBackend::_read(const hobject_t& hoid, ECBackend::rep_op_fut_t ECBackend::submit_transaction(const std::set<pg_shard_t> &pg_shards, const hobject_t& hoid, + crimson::osd::ObjectContextRef&& new_clone, ceph::os::Transaction&& txn, osd_op_params_t&& osd_op_p, epoch_t min_epoch, epoch_t max_epoch, diff --git a/src/crimson/osd/ec_backend.h b/src/crimson/osd/ec_backend.h index 90a7e2b1f4d..b14c78c9fc4 100644 --- a/src/crimson/osd/ec_backend.h +++ b/src/crimson/osd/ec_backend.h @@ -28,6 +28,7 @@ private: rep_op_fut_t submit_transaction(const std::set<pg_shard_t> &pg_shards, const hobject_t& hoid, + crimson::osd::ObjectContextRef&& new_clone, ceph::os::Transaction&& txn, osd_op_params_t&& req, epoch_t min_epoch, epoch_t max_epoch, diff --git a/src/crimson/osd/ops_executer.cc b/src/crimson/osd/ops_executer.cc index 4e735c3b4cb..97b241fdce4 100644 --- a/src/crimson/osd/ops_executer.cc +++ b/src/crimson/osd/ops_executer.cc @@ -940,6 +940,7 @@ std::unique_ptr<OpsExecuter::CloningContext> OpsExecuter::execute_clone( }; encode(cloned_snaps, cloning_ctx->log_entry.snaps); cloning_ctx->log_entry.clean_regions.mark_data_region_dirty(0, initial_obs.oi.size); + cloning_ctx->clone_obc = clone_obc; return cloning_ctx; } @@ -966,7 +967,7 @@ void OpsExecuter::update_clone_overlap() { void OpsExecuter::CloningContext::apply_to( std::vector<pg_log_entry_t>& log_entries, - ObjectContext& processed_obc) && + ObjectContext& processed_obc) { log_entry.mtime = processed_obc.obs.oi.mtime; log_entries.insert(log_entries.begin(), std::move(log_entry)); @@ -983,7 +984,7 @@ OpsExecuter::flush_clone_metadata( assert(!txn.empty()); update_clone_overlap(); if (cloning_ctx) { - std::move(*cloning_ctx).apply_to(log_entries, *obc); + cloning_ctx->apply_to(log_entries, *obc); } if (snapc.seq > obc->ssc->snapset.seq) { // update snapset with latest snap context diff --git a/src/crimson/osd/ops_executer.h b/src/crimson/osd/ops_executer.h index e770e825b32..94b64ccebb1 100644 --- a/src/crimson/osd/ops_executer.h +++ b/src/crimson/osd/ops_executer.h @@ -197,10 +197,11 @@ private: struct CloningContext { SnapSet new_snapset; pg_log_entry_t log_entry; + ObjectContextRef clone_obc; void apply_to( std::vector<pg_log_entry_t>& log_entries, - ObjectContext& processed_obc) &&; + ObjectContext& processed_obc); }; std::unique_ptr<CloningContext> cloning_ctx; @@ -504,6 +505,7 @@ OpsExecuter::flush_changes_n_do_ops_effects( ceph_assert(want_mutate); } + apply_stats(); if (want_mutate) { auto log_entries = flush_clone_metadata( prepare_transaction(ops), @@ -519,14 +521,15 @@ OpsExecuter::flush_changes_n_do_ops_effects( std::move(txn), std::move(obc), std::move(*osd_op_params), - std::move(log_entries)); + std::move(log_entries), + cloning_ctx + ? std::move(cloning_ctx->clone_obc) + : nullptr); submitted = std::move(_submitted); all_completed = std::move(_all_completed); } - apply_stats(); - if (op_effects.size()) [[unlikely]] { // need extra ref pg due to apply_stats() which can be executed after // informing snap mapper diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index 34ad97ceb06..0f19bfd7145 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -504,6 +504,8 @@ seastar::future<> OSD::start() }).then_unpack([this] { return _add_me_to_crush(); }).then([this] { + return _add_device_class(); + }).then([this] { monc->sub_want("osd_pg_creates", last_pg_create_epoch, 0); monc->sub_want("mgrmap", 0, 0); monc->sub_want("osdmap", 0, 0); @@ -608,6 +610,38 @@ seastar::future<> OSD::_send_boot() return monc->send_message(std::move(m)); } +seastar::future<> OSD::_add_device_class() +{ + LOG_PREFIX(OSD::_add_device_class); + if (!local_conf().get_val<bool>("osd_class_update_on_start")) { + co_return; + } + + std::string device_class = co_await store.get_default_device_class(); + if (device_class.empty()) { + WARN("Device class is empty; skipping crush update."); + co_return; + } + + INFO("device_class is {} ", device_class); + + std::string cmd = fmt::format( + R"({{"prefix": "osd crush set-device-class", "class": "{}", "ids": ["{}"]}})", + device_class, stringify(whoami) + ); + + auto [code, message, out] = co_await monc->run_command(std::move(cmd), {}); + if (code) { + // to be caught by crimson/osd/main.cc + WARN("fail to set device_class : {} ({})", message, code); + throw std::runtime_error("fail to set device_class"); + } else { + INFO("device_class was set: {}", message); + } + + co_return; +} + seastar::future<> OSD::_add_me_to_crush() { LOG_PREFIX(OSD::_add_me_to_crush); diff --git a/src/crimson/osd/osd.h b/src/crimson/osd/osd.h index d7d54d5d2c3..1a84ccd6a3f 100644 --- a/src/crimson/osd/osd.h +++ b/src/crimson/osd/osd.h @@ -188,6 +188,7 @@ private: seastar::future<> _preboot(version_t oldest_osdmap, version_t newest_osdmap); seastar::future<> _send_boot(); seastar::future<> _add_me_to_crush(); + seastar::future<> _add_device_class(); seastar::future<> osdmap_subscribe(version_t epoch, bool force_request); diff --git a/src/crimson/osd/osd_operations/snaptrim_event.cc b/src/crimson/osd/osd_operations/snaptrim_event.cc index c5bdcae47f2..8cab6125682 100644 --- a/src/crimson/osd/osd_operations/snaptrim_event.cc +++ b/src/crimson/osd/osd_operations/snaptrim_event.cc @@ -435,6 +435,7 @@ SnapTrimObjSubEvent::process_and_submit(ObjectContextRef head_obc, auto [submitted, all_completed] = co_await pg->submit_transaction( std::move(clone_obc), + nullptr, std::move(txn), std::move(osd_op_p), std::move(log_entries) diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc index de39fb45716..1e2988efbbe 100644 --- a/src/crimson/osd/pg.cc +++ b/src/crimson/osd/pg.cc @@ -907,11 +907,23 @@ void PG::mutate_object( } } +void PG::enqueue_push_for_backfill( + const hobject_t &obj, + const eversion_t &v, + const std::vector<pg_shard_t> &peers) +{ + assert(recovery_handler); + assert(recovery_handler->backfill_state); + auto backfill_state = recovery_handler->backfill_state.get(); + backfill_state->enqueue_standalone_push(obj, v, peers); +} + PG::interruptible_future< std::tuple<PG::interruptible_future<>, PG::interruptible_future<>>> PG::submit_transaction( ObjectContextRef&& obc, + ObjectContextRef&& new_clone, ceph::os::Transaction&& txn, osd_op_params_t&& osd_op_p, std::vector<pg_log_entry_t>&& log_entries) @@ -924,8 +936,9 @@ PG::submit_transaction( } epoch_t map_epoch = get_osdmap_epoch(); + auto at_version = osd_op_p.at_version; - peering_state.pre_submit_op(obc->obs.oi.soid, log_entries, osd_op_p.at_version); + peering_state.pre_submit_op(obc->obs.oi.soid, log_entries, at_version); peering_state.update_trim_to(); ceph_assert(!log_entries.empty()); @@ -939,6 +952,7 @@ PG::submit_transaction( auto [submitted, all_completed] = co_await backend->submit_transaction( peering_state.get_acting_recovery_backfill(), obc->obs.oi.soid, + std::move(new_clone), std::move(txn), std::move(osd_op_p), peering_state.get_last_peering_reset(), @@ -947,8 +961,8 @@ PG::submit_transaction( co_return std::make_tuple( std::move(submitted), all_completed.then_interruptible( - [this, last_complete=peering_state.get_info().last_complete, - at_version=osd_op_p.at_version](auto acked) { + [this, last_complete=peering_state.get_info().last_complete, at_version] + (auto acked) { for (const auto& peer : acked) { peering_state.update_peer_last_complete_ondisk( peer.shard, peer.last_complete_ondisk); @@ -1153,11 +1167,13 @@ PG::submit_executer_fut PG::submit_executer( [FNAME, this](auto&& txn, auto&& obc, auto&& osd_op_p, - auto&& log_entries) { + auto&& log_entries, + auto&& new_clone) { DEBUGDPP("object {} submitting txn", *this, obc->get_oid()); mutate_object(obc, txn, osd_op_p); return submit_transaction( std::move(obc), + std::move(new_clone), std::move(txn), std::move(osd_op_p), std::move(log_entries)); @@ -1604,7 +1620,7 @@ bool PG::should_send_op( // missing set hoid <= peering_state.get_peer_info(peer).last_backfill || (has_backfill_state() && hoid <= get_last_backfill_started() && - !peering_state.get_peer_missing(peer).is_missing(hoid))); + !is_missing_on_peer(peer, hoid))); if (!should_send) { ceph_assert(is_backfill_target(peer)); logger().debug("{} issue_repop shipping empty opt to osd." diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h index b0429c8fb4f..15aeec0e4f3 100644 --- a/src/crimson/osd/pg.h +++ b/src/crimson/osd/pg.h @@ -45,6 +45,7 @@ class MQuery; class OSDMap; class PGBackend; +class ReplicatedBackend; class PGPeeringEvent; class osd_op_params_t; @@ -678,6 +679,7 @@ private: std::tuple<interruptible_future<>, interruptible_future<>>> submit_transaction( ObjectContextRef&& obc, + ObjectContextRef&& new_clone, ceph::os::Transaction&& txn, osd_op_params_t&& oop, std::vector<pg_log_entry_t>&& log_entries); @@ -885,6 +887,10 @@ private: friend class SnapTrimObjSubEvent; private: + void enqueue_push_for_backfill( + const hobject_t &obj, + const eversion_t &v, + const std::vector<pg_shard_t> &peers); void mutate_object( ObjectContextRef& obc, ceph::os::Transaction& txn, @@ -893,7 +899,7 @@ private: bool can_discard_op(const MOSDOp& m) const; void context_registry_on_change(); bool is_missing_object(const hobject_t& soid) const { - return peering_state.get_pg_log().get_missing().get_items().count(soid); + return get_local_missing().is_missing(soid); } bool is_unreadable_object(const hobject_t &oid, eversion_t* v = 0) const final { @@ -901,6 +907,11 @@ private: !peering_state.get_missing_loc().readable_with_acting( oid, get_actingset(), v); } + bool is_missing_on_peer( + const pg_shard_t &peer, + const hobject_t &soid) const { + return peering_state.get_peer_missing(peer).is_missing(soid); + } bool is_degraded_or_backfilling_object(const hobject_t& soid) const; const std::set<pg_shard_t> &get_actingset() const { return peering_state.get_actingset(); @@ -908,6 +919,7 @@ private: private: friend class IOInterruptCondition; + friend class ::ReplicatedBackend; struct log_update_t { std::set<pg_shard_t> waiting_on; seastar::shared_promise<> all_committed; diff --git a/src/crimson/osd/pg_backend.h b/src/crimson/osd/pg_backend.h index fa1f1405ffe..813218983fd 100644 --- a/src/crimson/osd/pg_backend.h +++ b/src/crimson/osd/pg_backend.h @@ -414,6 +414,7 @@ public: virtual rep_op_fut_t submit_transaction(const std::set<pg_shard_t> &pg_shards, const hobject_t& hoid, + crimson::osd::ObjectContextRef&& new_clone, ceph::os::Transaction&& txn, osd_op_params_t&& osd_op_p, epoch_t min_epoch, epoch_t max_epoch, diff --git a/src/crimson/osd/pg_recovery.h b/src/crimson/osd/pg_recovery.h index 705b3176b97..657e6d3e888 100644 --- a/src/crimson/osd/pg_recovery.h +++ b/src/crimson/osd/pg_recovery.h @@ -45,6 +45,10 @@ public: seastar::future<> stop() { return seastar::now(); } void on_pg_clean(); + void enqueue_push( + const hobject_t& obj, + const eversion_t& v, + const std::vector<pg_shard_t> &peers) final; private: PGRecoveryListener* pg; size_t start_primary_recovery_ops( @@ -108,10 +112,6 @@ private: const hobject_t& end) final; void request_primary_scan( const hobject_t& begin) final; - void enqueue_push( - const hobject_t& obj, - const eversion_t& v, - const std::vector<pg_shard_t> &peers) final; void enqueue_drop( const pg_shard_t& target, const hobject_t& obj, diff --git a/src/crimson/osd/replicated_backend.cc b/src/crimson/osd/replicated_backend.cc index 12ee38b4370..f09cd147ea9 100644 --- a/src/crimson/osd/replicated_backend.cc +++ b/src/crimson/osd/replicated_backend.cc @@ -36,19 +36,59 @@ ReplicatedBackend::_read(const hobject_t& hoid, return store->read(coll, ghobject_t{hoid}, off, len, flags); } +MURef<MOSDRepOp> ReplicatedBackend::new_repop_msg( + const pg_shard_t &pg_shard, + const hobject_t &hoid, + const bufferlist &encoded_txn, + const osd_op_params_t &osd_op_p, + epoch_t min_epoch, + epoch_t map_epoch, + const std::vector<pg_log_entry_t> &log_entries, + bool send_op, + ceph_tid_t tid) +{ + ceph_assert(pg_shard != whoami); + auto m = crimson::make_message<MOSDRepOp>( + osd_op_p.req_id, + whoami, + spg_t{pgid, pg_shard.shard}, + hoid, + CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK, + map_epoch, + min_epoch, + tid, + osd_op_p.at_version); + if (send_op) { + m->set_data(encoded_txn); + } else { + ceph::os::Transaction t; + bufferlist bl; + encode(t, bl); + m->set_data(bl); + } + encode(log_entries, m->logbl); + m->pg_trim_to = osd_op_p.pg_trim_to; + m->pg_committed_to = osd_op_p.pg_committed_to; + m->pg_stats = pg.get_info().stats; + return m; +} + ReplicatedBackend::rep_op_fut_t -ReplicatedBackend::submit_transaction(const std::set<pg_shard_t>& pg_shards, - const hobject_t& hoid, - ceph::os::Transaction&& t, - osd_op_params_t&& opp, - epoch_t min_epoch, epoch_t map_epoch, - std::vector<pg_log_entry_t>&& logv) +ReplicatedBackend::submit_transaction( + const std::set<pg_shard_t> &pg_shards, + const hobject_t& hoid, + crimson::osd::ObjectContextRef &&new_clone, + ceph::os::Transaction&& t, + osd_op_params_t&& opp, + epoch_t min_epoch, epoch_t map_epoch, + std::vector<pg_log_entry_t>&& logv) { LOG_PREFIX(ReplicatedBackend::submit_transaction); DEBUGDPP("object {}", dpp, hoid); auto log_entries = std::move(logv); auto txn = std::move(t); auto osd_op_p = std::move(opp); + auto _new_clone = std::move(new_clone); const ceph_tid_t tid = shard_services.get_tid(); auto pending_txn = @@ -60,37 +100,34 @@ ReplicatedBackend::submit_transaction(const std::set<pg_shard_t>& pg_shards, le.mark_unrollbackable(); } + std::vector<pg_shard_t> to_push_clone; auto sends = std::make_unique<std::vector<seastar::future<>>>(); - for (auto pg_shard : pg_shards) { - if (pg_shard != whoami) { - auto m = crimson::make_message<MOSDRepOp>( - osd_op_p.req_id, - whoami, - spg_t{pgid, pg_shard.shard}, - hoid, - CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK, - map_epoch, - min_epoch, - tid, - osd_op_p.at_version); - if (pg.should_send_op(pg_shard, hoid)) { - m->set_data(encoded_txn); - } else { - ceph::os::Transaction t; - bufferlist bl; - encode(t, bl); - m->set_data(bl); + for (auto &pg_shard : pg_shards) { + if (pg_shard == whoami) { + continue; + } + MURef<MOSDRepOp> m; + if (pg.should_send_op(pg_shard, hoid)) { + m = new_repop_msg( + pg_shard, hoid, encoded_txn, osd_op_p, + min_epoch, map_epoch, log_entries, true, tid); + } else { + m = new_repop_msg( + pg_shard, hoid, encoded_txn, osd_op_p, + min_epoch, map_epoch, log_entries, false, tid); + if (_new_clone && pg.is_missing_on_peer(pg_shard, hoid)) { + // The head is in the push queue but hasn't been pushed yet. + // We need to ensure that the newly created clone will be + // pushed as well, otherwise we might skip it. + // See: https://tracker.ceph.com/issues/68808 + to_push_clone.push_back(pg_shard); } - pending_txn->second.acked_peers.push_back({pg_shard, eversion_t{}}); - encode(log_entries, m->logbl); - m->pg_trim_to = osd_op_p.pg_trim_to; - m->pg_committed_to = osd_op_p.pg_committed_to; - m->pg_stats = pg.get_info().stats; - // TODO: set more stuff. e.g., pg_states - sends->emplace_back( - shard_services.send_to_osd( - pg_shard.osd, std::move(m), map_epoch)); } + pending_txn->second.acked_peers.push_back({pg_shard, eversion_t{}}); + // TODO: set more stuff. e.g., pg_states + sends->emplace_back( + shard_services.send_to_osd( + pg_shard.osd, std::move(m), map_epoch)); } co_await pg.update_snap_map(log_entries, txn); @@ -120,9 +157,16 @@ ReplicatedBackend::submit_transaction(const std::set<pg_shard_t>& pg_shards, return seastar::now(); } return peers->all_committed.get_shared_future(); - }).then_interruptible([pending_txn, this] { + }).then_interruptible([pending_txn, this, _new_clone, + to_push_clone=std::move(to_push_clone)] { auto acked_peers = std::move(pending_txn->second.acked_peers); pending_trans.erase(pending_txn); + if (_new_clone && !to_push_clone.empty()) { + pg.enqueue_push_for_backfill( + _new_clone->obs.oi.soid, + _new_clone->obs.oi.version, + to_push_clone); + } return seastar::make_ready_future< crimson::osd::acked_peers_t>(std::move(acked_peers)); }); diff --git a/src/crimson/osd/replicated_backend.h b/src/crimson/osd/replicated_backend.h index fb8704d8742..d5844b23a0c 100644 --- a/src/crimson/osd/replicated_backend.h +++ b/src/crimson/osd/replicated_backend.h @@ -35,6 +35,7 @@ private: rep_op_fut_t submit_transaction( const std::set<pg_shard_t> &pg_shards, const hobject_t& hoid, + crimson::osd::ObjectContextRef&& new_clone, ceph::os::Transaction&& txn, osd_op_params_t&& osd_op_p, epoch_t min_epoch, epoch_t max_epoch, @@ -60,6 +61,17 @@ private: pending_transactions_t pending_trans; crimson::osd::PG& pg; + MURef<MOSDRepOp> new_repop_msg( + const pg_shard_t &pg_shard, + const hobject_t &hoid, + const bufferlist &encoded_txn, + const osd_op_params_t &osd_op_p, + epoch_t min_epoch, + epoch_t map_epoch, + const std::vector<pg_log_entry_t> &log_entries, + bool send_op, + ceph_tid_t tid); + seastar::future<> request_committed( const osd_reqid_t& reqid, const eversion_t& at_version) final; }; diff --git a/src/exporter/DaemonMetricCollector.cc b/src/exporter/DaemonMetricCollector.cc index 4b8a8131bcf..d27b3ac43c5 100644 --- a/src/exporter/DaemonMetricCollector.cc +++ b/src/exporter/DaemonMetricCollector.cc @@ -29,9 +29,16 @@ using json_object = boost::json::object; using json_value = boost::json::value; using json_array = boost::json::array; -void DaemonMetricCollector::request_loop(boost::asio::steady_timer &timer) { - timer.async_wait([&](const boost::system::error_code &e) { - std::cerr << e << std::endl; +void DaemonMetricCollector::request_loop() { + timer.async_wait([this](const boost::system::error_code &e) { + if (shutdown_flag) { + dout(1) << "Metric collector request loop cancelled" << dendl; + return; + } + + if (e) return; // Exit on error or cancellation + + dout(10) << "Getting metrics loop..." << dendl; update_sockets(); bool sort_metrics = g_conf().get_val<bool>("exporter_sort_metrics"); @@ -42,19 +49,24 @@ void DaemonMetricCollector::request_loop(boost::asio::steady_timer &timer) { auto stats_period = g_conf().get_val<int64_t>("exporter_stats_period"); // time to wait before sending requests again timer.expires_from_now(std::chrono::seconds(stats_period)); - request_loop(timer); + request_loop(); }); } void DaemonMetricCollector::main() { - // time to wait before sending requests again - - boost::asio::io_context io; - boost::asio::steady_timer timer{io, std::chrono::seconds(0)}; - request_loop(timer); + shutdown_flag = false; + timer.expires_from_now(std::chrono::seconds(0)); + request_loop(); io.run(); } +void DaemonMetricCollector::shutdown(){ + shutdown_flag = true; + timer.cancel(); // Explicitly cancel the timer + dout(1) << "Collector shutdown initiated, timer canceled" << dendl; + io.stop(); +} + std::string DaemonMetricCollector::get_metrics() { const std::lock_guard<std::mutex> lock(metrics_mutex); return metrics; @@ -499,3 +511,4 @@ DaemonMetricCollector &collector_instance() { static DaemonMetricCollector instance; return instance; } + diff --git a/src/exporter/DaemonMetricCollector.h b/src/exporter/DaemonMetricCollector.h index 3302e95df91..5831a0fa3b0 100644 --- a/src/exporter/DaemonMetricCollector.h +++ b/src/exporter/DaemonMetricCollector.h @@ -1,17 +1,20 @@ #pragma once #include "common/admin_socket_client.h" +#include <atomic> #include <map> #include <string> #include <vector> #include <boost/asio/steady_timer.hpp> +#include <boost/thread.hpp> #include <boost/json/object.hpp> #include <filesystem> #include <map> #include <string> #include <vector> + struct pstat { unsigned long utime; unsigned long stime; @@ -43,11 +46,16 @@ public: std::string metrics; std::pair<labels_t, std::string> add_fixed_name_metrics(std::string metric_name); void update_sockets(); + void shutdown(); private: std::mutex metrics_mutex; std::unique_ptr<MetricsBuilder> builder; - void request_loop(boost::asio::steady_timer &timer); + boost::asio::io_context io; + boost::asio::steady_timer timer{io}; + std::atomic<bool> shutdown_flag{false}; + + void request_loop(); void dump_asok_metric(boost::json::object perf_info, boost::json::value perf_values, std::string name, @@ -108,3 +116,4 @@ public: }; DaemonMetricCollector &collector_instance(); + diff --git a/src/exporter/ceph_exporter.cc b/src/exporter/ceph_exporter.cc index 2e2c16bb085..44b67c7e615 100644 --- a/src/exporter/ceph_exporter.cc +++ b/src/exporter/ceph_exporter.cc @@ -1,16 +1,31 @@ #include "common/ceph_argparse.h" #include "common/config.h" -#include "exporter/DaemonMetricCollector.h" -#include "exporter/web_server.h" +#include "common/debug.h" #include "global/global_init.h" #include "global/global_context.h" - +#include "global/signal_handler.h" +#include "exporter/DaemonMetricCollector.h" +#include "exporter/web_server.h" #include <boost/thread/thread.hpp> #include <iostream> #include <map> #include <string> +#include <atomic> +#include <chrono> +#include <thread> #define dout_context g_ceph_context +#define dout_subsys ceph_subsys_ceph_exporter + +DaemonMetricCollector &collector = collector_instance(); + +static void handle_signal(int signum) +{ + ceph_assert(signum == SIGINT || signum == SIGTERM); + derr << "*** Got signal " << sig_str(signum) << " ***" << dendl; + // Finish the DaemonMetricCollector + collector.shutdown(); +} static void usage() { std::cout << "usage: ceph-exporter [options]\n" @@ -27,7 +42,6 @@ static void usage() { } int main(int argc, char **argv) { - auto args = argv_to_vec(argc, argv); if (args.empty()) { std::cerr << argv[0] << ": -h or --help for usage" << std::endl; @@ -64,8 +78,30 @@ int main(int argc, char **argv) { } common_init_finish(g_ceph_context); + // Register signal handlers + init_async_signal_handler(); + register_async_signal_handler(SIGHUP, sighup_handler); + register_async_signal_handler_oneshot(SIGINT, handle_signal); + register_async_signal_handler_oneshot(SIGTERM, handle_signal); + + // Start the web server thread boost::thread server_thread(web_server_thread_entrypoint); - DaemonMetricCollector &collector = collector_instance(); + + // Start the DaemonMetricCollector collector.main(); + + // Interrupted. Time to terminate + unregister_async_signal_handler(SIGHUP, sighup_handler); + unregister_async_signal_handler(SIGINT, handle_signal); + unregister_async_signal_handler(SIGTERM, handle_signal); + shutdown_async_signal_handler(); + + // Stop the web server thread by interrupting it + stop_web_server(); + server_thread.interrupt(); // Interrupt the web server thread server_thread.join(); + + dout(1) << "Ceph exporter stopped" << dendl; + + return 0; } diff --git a/src/exporter/web_server.cc b/src/exporter/web_server.cc index 96cc02b389f..c01205f26bb 100644 --- a/src/exporter/web_server.cc +++ b/src/exporter/web_server.cc @@ -28,6 +28,9 @@ namespace net = boost::asio; // from <boost/asio.hpp> namespace ssl = boost::asio::ssl; // from <boost/asio/ssl.hpp> using tcp = boost::asio::ip::tcp; // from <boost/asio/ip/tcp.hpp> +//common io context for the web servers +std::shared_ptr<net::io_context> global_ioc; + // Base class for common functionality class web_connection { public: @@ -43,7 +46,7 @@ protected: web_connection(net::any_io_executor executor, std::chrono::seconds timeout) : deadline_(executor, timeout) {} - // Common request processing logic + // Common request processing logic void process_request() { response_.version(request_.version()); response_.keep_alive(request_.keep_alive()); @@ -64,7 +67,7 @@ protected: write_response(); } - // Construct a response message based on the request target + // Construct a response message based on the request target void create_response() { if (request_.target() == "/") { response_.result(http::status::moved_permanently); @@ -81,7 +84,7 @@ protected: } } - // Asynchronously transmit the response message + // Asynchronously transmit the response message virtual void write_response() = 0; // Check whether we have spent enough time on this connection @@ -228,28 +231,33 @@ void https_server(tcp::acceptor &acceptor, ssl::context &ssl_ctx) { } void run_http_server(const std::string& exporter_addr, short unsigned int port) { - net::io_context ioc{1}; - tcp::acceptor acceptor{ioc, {net::ip::make_address(exporter_addr), port}}; - tcp::socket socket{ioc}; + tcp::acceptor acceptor{*global_ioc, {net::ip::make_address(exporter_addr), port}}; + tcp::socket socket{*global_ioc}; http_server(acceptor, socket); dout(1) << "HTTP server running on " << exporter_addr << ":" << port << dendl; - ioc.run(); + global_ioc->run(); } void run_https_server(const std::string& exporter_addr, short unsigned int port, const std::string& cert_file, const std::string& key_file) { - net::io_context ioc{1}; ssl::context ssl_ctx(ssl::context::tlsv13); ssl_ctx.use_certificate_chain_file(cert_file); ssl_ctx.use_private_key_file(key_file, ssl::context::pem); - tcp::acceptor acceptor{ioc, {net::ip::make_address(exporter_addr), port}}; + tcp::acceptor acceptor{*global_ioc, {net::ip::make_address(exporter_addr), port}}; https_server(acceptor, ssl_ctx); dout(1) << "HTTPS server running on " << exporter_addr << ":" << port << dendl; - ioc.run(); + global_ioc->run(); +} + +void stop_web_server() { + if (global_ioc) { + global_ioc->stop(); + dout(1) << "Ceph exporter web server stopped" << dendl; + } } void web_server_thread_entrypoint() { @@ -259,18 +267,21 @@ void web_server_thread_entrypoint() { std::string cert_file = g_conf().get_val<std::string>("exporter_cert_file"); std::string key_file = g_conf().get_val<std::string>("exporter_key_file"); + // Initialize global_ioc + global_ioc = std::make_shared<net::io_context>(1); + if (cert_file.empty() && key_file.empty()) { run_http_server(exporter_addr, port); } else { try { run_https_server(exporter_addr, port, cert_file, key_file); } catch (const std::exception &e) { - dout(1) << "Failed to start HTTPS server: " << e.what() << dendl; + derr << "Failed to start HTTPS server: " << e.what() << dendl; exit(EXIT_FAILURE); } } } catch (std::exception const &e) { - dout(1) << "Error: " << e.what() << dendl; + derr << "Error: " << e.what() << dendl; exit(EXIT_FAILURE); } } diff --git a/src/exporter/web_server.h b/src/exporter/web_server.h index c3339a8d43a..c6d4c54eca4 100644 --- a/src/exporter/web_server.h +++ b/src/exporter/web_server.h @@ -3,3 +3,4 @@ #include <string> void web_server_thread_entrypoint(); +void stop_web_server(); diff --git a/src/global/signal_handler.cc b/src/global/signal_handler.cc index d3387267871..b8149718724 100644 --- a/src/global/signal_handler.cc +++ b/src/global/signal_handler.cc @@ -307,7 +307,7 @@ static void handle_oneshot_fatal_signal(int signum) char buf[1024]; char pthread_name[16] = {0}; //limited by 16B include terminating null byte. - int r = ceph_pthread_getname(pthread_self(), pthread_name, sizeof(pthread_name)); + int r = ceph_pthread_getname(pthread_name, sizeof(pthread_name)); (void)r; #if defined(__sun) char message[SIG2STR_MAX]; diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 627f4a3e85b..137669c1963 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -1005,7 +1005,7 @@ extern const char *ceph_cap_op_name(int op); /* extra info for cap import/export */ struct ceph_mds_cap_peer { __le64 cap_id; - __le32 seq; + __le32 issue_seq; __le32 mseq; __le32 mds; __u8 flags; @@ -1058,7 +1058,7 @@ struct ceph_mds_cap_release { struct ceph_mds_cap_item { __le64 ino; __le64 cap_id; - __le32 migrate_seq, seq; + __le32 migrate_seq, issue_seq; } __attribute__ ((packed)); #define CEPH_MDS_LEASE_REVOKE 1 /* mds -> client */ diff --git a/src/include/compat.h b/src/include/compat.h index 53285243d91..a7d10fc5425 100644 --- a/src/include/compat.h +++ b/src/include/compat.h @@ -179,60 +179,12 @@ struct cpu_set_t; #define MSG_DONTWAIT MSG_NONBLOCK #endif -/* compiler warning free success noop */ -#define pthread_setname_noop_helper(thread, name) ({ \ - int __i = 0; \ - __i; }) - -#define pthread_getname_noop_helper(thread, name, len) ({ \ - if (name != NULL) \ - *name = '\0'; \ - 0; }) - #define pthread_kill_unsupported_helper(thread, signal) ({ \ int __i = -ENOTSUP; \ __i; }) #if defined(_WIN32) && defined(__clang__) && \ !defined(_LIBCPP_HAS_THREAD_API_PTHREAD) - // In this case, llvm doesn't use the pthread api for std::thread. - // We cannot use native_handle() with the pthread api, nor can we pass - // it to Windows API functions. - #define ceph_pthread_setname pthread_setname_noop_helper -#elif defined(HAVE_PTHREAD_SETNAME_NP) - #if defined(__APPLE__) - #define ceph_pthread_setname(thread, name) ({ \ - int __result = 0; \ - if (thread == pthread_self()) \ - __result = pthread_setname_np(name); \ - __result; }) - #else - #define ceph_pthread_setname pthread_setname_np - #endif -#elif defined(HAVE_PTHREAD_SET_NAME_NP) - /* Fix a small name diff and return 0 */ - #define ceph_pthread_setname(thread, name) ({ \ - pthread_set_name_np(thread, name); \ - 0; }) -#else - #define ceph_pthread_setname pthread_setname_noop_helper -#endif - -#if defined(_WIN32) && defined(__clang__) && \ - !defined(_LIBCPP_HAS_THREAD_API_PTHREAD) - #define ceph_pthread_getname pthread_getname_noop_helper -#elif defined(HAVE_PTHREAD_GETNAME_NP) - #define ceph_pthread_getname pthread_getname_np -#elif defined(HAVE_PTHREAD_GET_NAME_NP) - #define ceph_pthread_getname(thread, name, len) ({ \ - pthread_get_name_np(thread, name, len); \ - 0; }) -#else - #define ceph_pthread_getname pthread_getname_noop_helper -#endif - -#if defined(_WIN32) && defined(__clang__) && \ - !defined(_LIBCPP_HAS_THREAD_API_PTHREAD) #define ceph_pthread_kill pthread_kill_unsupported_helper #else #define ceph_pthread_kill pthread_kill @@ -244,6 +196,9 @@ int ceph_posix_fallocate(int fd, off_t offset, off_t len); extern "C" { #endif +int ceph_pthread_getname(char* name, size_t size); +int ceph_pthread_setname(const char* name); + int pipe_cloexec(int pipefd[2], int flags); char *ceph_strerror_r(int errnum, char *buf, size_t buflen); unsigned get_page_size(); diff --git a/src/include/elist.h b/src/include/elist.h index edfb7955494..e777873b045 100644 --- a/src/include/elist.h +++ b/src/include/elist.h @@ -15,6 +15,10 @@ #ifndef CEPH_ELIST_H #define CEPH_ELIST_H +#include <cstddef> // for size_t + +#include "include/ceph_assert.h" + /* * elist: embedded list. * diff --git a/src/include/str_list.h b/src/include/str_list.h index cad76c1d6f5..a4c7432c6ef 100644 --- a/src/include/str_list.h +++ b/src/include/str_list.h @@ -2,7 +2,6 @@ #define CEPH_STRLIST_H #include <list> -#include <set> #include <string> #include <string_view> #include <vector> diff --git a/src/log/Entry.h b/src/log/Entry.h index db39eca0ef3..eeb25c5f593 100644 --- a/src/log/Entry.h +++ b/src/log/Entry.h @@ -24,6 +24,7 @@ namespace logging { class Entry { public: using time = log_time; + using thread_name_t = std::array<char, 16>; Entry() = delete; Entry(short pr, short sub) : @@ -32,8 +33,7 @@ public: m_prio(pr), m_subsys(sub) { - strncpy(m_thread_name, Thread::get_thread_name().data(), 16); - m_thread_name[15] = '\0'; + ceph_pthread_getname(m_thread_name.data(), m_thread_name.size()); } Entry(const Entry &) = default; Entry& operator=(const Entry &) = default; @@ -47,7 +47,7 @@ public: time m_stamp; pthread_t m_thread; short m_prio, m_subsys; - char m_thread_name[16]; + thread_name_t m_thread_name{}; static log_clock& clock() { static log_clock clock; diff --git a/src/log/Log.cc b/src/log/Log.cc index 49dd03c06c0..63d5205d9e2 100644 --- a/src/log/Log.cc +++ b/src/log/Log.cc @@ -31,6 +31,7 @@ #include <fmt/format.h> #include <fmt/ostream.h> +#include <fmt/ranges.h> #define MAX_LOG_BUF 65536 @@ -372,6 +373,7 @@ void Log::_flush_logbuf() void Log::_flush(EntryVector& t, bool crash) { + auto now = mono_clock::now(); long len = 0; if (t.empty()) { assert(m_log_buf.empty()); @@ -443,10 +445,29 @@ void Log::_flush(EntryVector& t, bool crash) m_journald->log_entry(e); } + { + auto [it, _] = m_recent_thread_names.try_emplace(e.m_thread, now, DEFAULT_MAX_THREAD_NAMES); + auto& [t, names] = it->second; + if (names.size() == 0 || names.front() != e.m_thread_name.data()) { + names.push_front(e.m_thread_name.data()); + } + t = now; + } + m_recent.push_back(std::move(e)); } t.clear(); + for (auto it = m_recent_thread_names.begin(); it != m_recent_thread_names.end(); ) { + auto t = it->second.first; + auto since = now - t; + if (since > std::chrono::seconds(60*60*24)) { + it = m_recent_thread_names.erase(it); + } else { + ++it; + } + } + _flush_logbuf(); } @@ -493,14 +514,10 @@ void Log::dump_recent() _flush(m_flush, false); _log_message("--- begin dump of recent events ---", true); - std::set<std::pair<pthread_t, const char *>> recent_pthread_ids; { EntryVector t; t.insert(t.end(), std::make_move_iterator(m_recent.begin()), std::make_move_iterator(m_recent.end())); m_recent.clear(); - for (const auto& e : t) { - recent_pthread_ids.emplace(std::make_pair(e.m_thread, e.m_thread_name)); - } _flush(t, true); } @@ -515,11 +532,15 @@ void Log::dump_recent() m_stderr_log, m_stderr_crash), true); _log_message("--- pthread ID / name mapping for recent threads ---", true); - for (auto& [pthread_id, pthread_name] : recent_pthread_ids) + for (const auto& [tid, t_names] : m_recent_thread_names) { + [[maybe_unused]] auto [t, names] = t_names; // we want the ID to be printed in the same format as we use for a log entry. // The reason is easier grepping. - _log_message(fmt::format(" {:x} / {}", tid_to_int(pthread_id), pthread_name), true); + auto msg = fmt::format(" {:x} / {}", + tid_to_int(tid), + fmt::join(names, ", ")); + _log_message(msg, true); } _log_message(fmt::format(" max_recent {:9}", m_recent.capacity()), true); diff --git a/src/log/Log.h b/src/log/Log.h index 3a60937af55..46d97734305 100644 --- a/src/log/Log.h +++ b/src/log/Log.h @@ -7,6 +7,7 @@ #include <boost/circular_buffer.hpp> #include <condition_variable> +#include <map> #include <memory> #include <mutex> #include <queue> @@ -14,6 +15,7 @@ #include <string_view> #include "common/Thread.h" +#include "common/ceph_time.h" #include "common/likely.h" #include "log/Entry.h" @@ -86,9 +88,14 @@ protected: private: using EntryRing = boost::circular_buffer<ConcreteEntry>; + using mono_clock = ceph::coarse_mono_clock; + using mono_time = ceph::coarse_mono_time; + + using RecentThreadNames = std::map<pthread_t, std::pair<mono_time, boost::circular_buffer<std::string> > >; static const std::size_t DEFAULT_MAX_NEW = 100; static const std::size_t DEFAULT_MAX_RECENT = 10000; + static constexpr std::size_t DEFAULT_MAX_THREAD_NAMES = 4; Log **m_indirect_this; @@ -102,6 +109,7 @@ private: pthread_t m_queue_mutex_holder; pthread_t m_flush_mutex_holder; + RecentThreadNames m_recent_thread_names; // protected by m_flush_mutex EntryVector m_new; ///< new entries EntryRing m_recent; ///< recent (less new) entries we've already written at low detail EntryVector m_flush; ///< entries to be flushed (here to optimize heap allocations) diff --git a/src/mds/Beacon.cc b/src/mds/Beacon.cc index 642d3428a27..6fbfc79d416 100644 --- a/src/mds/Beacon.cc +++ b/src/mds/Beacon.cc @@ -61,6 +61,7 @@ void Beacon::shutdown() std::unique_lock<std::mutex> lock(mutex); if (!finished) { finished = true; + cvar.notify_all(); lock.unlock(); if (sender.joinable()) sender.join(); @@ -74,7 +75,7 @@ void Beacon::init(const MDSMap &mdsmap) _notify_mdsmap(mdsmap); sender = std::thread([this]() { - ceph_pthread_setname(pthread_self(), "beacon"); + ceph_pthread_setname("mds-beacon"); std::unique_lock<std::mutex> lock(mutex); bool sent; while (!finished) { @@ -320,16 +321,15 @@ void Beacon::notify_health(MDSRank const *mds) // Detect MDS_HEALTH_TRIM condition // Indicates MDS is not trimming promptly { - const auto log_max_segments = mds->mdlog->get_max_segments(); - const auto log_warn_factor = g_conf().get_val<double>("mds_log_warn_factor"); - if (mds->mdlog->get_num_segments() > (size_t)(log_max_segments * log_warn_factor)) { + if (mds->mdlog->is_trim_slow()) { + auto num_segments = mds->mdlog->get_num_segments(); + auto max_segments = mds->mdlog->get_max_segments(); CachedStackStringStream css; - *css << "Behind on trimming (" << mds->mdlog->get_num_segments() - << "/" << log_max_segments << ")"; + *css << "Behind on trimming (" << num_segments << "/" << max_segments << ")"; MDSHealthMetric m(MDS_HEALTH_TRIM, HEALTH_WARN, css->strv()); - m.metadata["num_segments"] = stringify(mds->mdlog->get_num_segments()); - m.metadata["max_segments"] = stringify(log_max_segments); + m.metadata["num_segments"] = stringify(num_segments); + m.metadata["max_segments"] = stringify(max_segments); health.metrics.push_back(m); } } diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index af9f8edfffa..7d28e039d9c 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -2494,6 +2494,10 @@ void CDir::_omap_commit_ops(int r, int op_prio, int64_t metapool, version_t vers mdcache->mds->heartbeat_reset(); } + // the last omap commit includes the omap header, so account for + // that size early on so that when we reach `commit_one(true)`, + // there is enough space for the header. + write_size += sizeof(fnode_t); using ceph::encode; for (auto &item : to_set) { bufferlist bl; diff --git a/src/mds/Capability.h b/src/mds/Capability.h index 9adcf3b25b9..0782464ad94 100644 --- a/src/mds/Capability.h +++ b/src/mds/Capability.h @@ -218,8 +218,6 @@ public: void set_cap_id(uint64_t i) { cap_id = i; } uint64_t get_cap_id() const { return cap_id; } - //ceph_seq_t get_last_issue() { return last_issue; } - bool is_suppress() const { return suppress > 0; } void inc_suppress() { suppress++; } void dec_suppress() { suppress--; } diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 63608d48864..d64f176acb6 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -2599,6 +2599,7 @@ int Locker::issue_caps(CInode *in, Capability *only_cap) in->find_snaprealm()->inode->ino(), cap->get_cap_id(), cap->get_last_seq(), pending, wanted, 0, cap->get_mseq(), + cap->get_last_issue(), mds->get_osd_epoch_barrier()); in->encode_cap_message(m, cap); @@ -2649,6 +2650,7 @@ int Locker::issue_caps(CInode *in, Capability *only_cap) in->find_snaprealm()->inode->ino(), cap->get_cap_id(), cap->get_last_seq(), after, wanted, 0, cap->get_mseq(), + cap->get_last_issue(), mds->get_osd_epoch_barrier()); in->encode_cap_message(m, cap); @@ -2675,6 +2677,7 @@ void Locker::issue_truncate(CInode *in) cap->get_cap_id(), cap->get_last_seq(), cap->pending(), cap->wanted(), 0, cap->get_mseq(), + cap->get_last_issue(), mds->get_osd_epoch_barrier()); in->encode_cap_message(m, cap); mds->send_message_client_counted(m, cap->get_session()); @@ -3165,6 +3168,7 @@ void Locker::share_inode_max_size(CInode *in, Capability *only_cap) cap->pending(), cap->wanted(), 0, cap->get_mseq(), + cap->get_last_issue(), mds->get_osd_epoch_barrier()); in->encode_cap_message(m, cap); mds->send_message_client_counted(m, cap->get_session()); @@ -3375,10 +3379,10 @@ void Locker::handle_client_caps(const cref_t<MClientCaps> &m) ref_t<MClientCaps> ack; if (op == CEPH_CAP_OP_FLUSHSNAP) { if (mds->logger) mds->logger->inc(l_mdss_ceph_cap_op_flushsnap_ack); - ack = make_message<MClientCaps>(CEPH_CAP_OP_FLUSHSNAP_ACK, m->get_ino(), 0, 0, 0, 0, 0, dirty, 0, mds->get_osd_epoch_barrier()); + ack = make_message<MClientCaps>(CEPH_CAP_OP_FLUSHSNAP_ACK, m->get_ino(), 0, 0, 0, 0, 0, dirty, 0, 0, mds->get_osd_epoch_barrier()); } else { if (mds->logger) mds->logger->inc(l_mdss_ceph_cap_op_flush_ack); - ack = make_message<MClientCaps>(CEPH_CAP_OP_FLUSH_ACK, m->get_ino(), 0, m->get_cap_id(), m->get_seq(), m->get_caps(), 0, dirty, 0, mds->get_osd_epoch_barrier()); + ack = make_message<MClientCaps>(CEPH_CAP_OP_FLUSH_ACK, m->get_ino(), 0, m->get_cap_id(), m->get_seq(), m->get_caps(), 0, dirty, 0, 0, mds->get_osd_epoch_barrier()); } ack->set_snap_follows(follows); ack->set_client_tid(m->get_client_tid()); @@ -3500,7 +3504,7 @@ void Locker::handle_client_caps(const cref_t<MClientCaps> &m) // case we get a dup response, so whatever.) ref_t<MClientCaps> ack; if (dirty) { - ack = make_message<MClientCaps>(CEPH_CAP_OP_FLUSHSNAP_ACK, in->ino(), 0, 0, 0, 0, 0, dirty, 0, mds->get_osd_epoch_barrier()); + ack = make_message<MClientCaps>(CEPH_CAP_OP_FLUSHSNAP_ACK, in->ino(), 0, 0, 0, 0, 0, dirty, 0, 0, mds->get_osd_epoch_barrier()); ack->set_snap_follows(follows); ack->set_client_tid(m->get_client_tid()); ack->set_oldest_flush_tid(m->get_oldest_flush_tid()); @@ -3589,7 +3593,7 @@ void Locker::handle_client_caps(const cref_t<MClientCaps> &m) dout(7) << " flush client." << client << " dirty " << ccap_string(dirty) << " seq " << m->get_seq() << " on " << *in << dendl; ack = make_message<MClientCaps>(CEPH_CAP_OP_FLUSH_ACK, in->ino(), 0, cap->get_cap_id(), m->get_seq(), - m->get_caps(), 0, dirty, 0, mds->get_osd_epoch_barrier()); + m->get_caps(), 0, dirty, 0, cap->get_last_issue(), mds->get_osd_epoch_barrier()); ack->set_client_tid(m->get_client_tid()); ack->set_oldest_flush_tid(m->get_oldest_flush_tid()); } @@ -4222,7 +4226,7 @@ void Locker::handle_client_cap_release(const cref_t<MClientCapRelease> &m) Session *session = mds->get_session(m); for (const auto &cap : m->caps) { - _do_cap_release(client, inodeno_t((uint64_t)cap.ino) , cap.cap_id, cap.migrate_seq, cap.seq); + _do_cap_release(client, inodeno_t((uint64_t)cap.ino) , cap.cap_id, cap.migrate_seq, cap.issue_seq); } if (session) { diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index eb2b529dcfa..3af0d8c6b1e 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -5891,7 +5891,7 @@ void MDCache::do_cap_import(Session *session, CInode *in, Capability *cap, auto reap = make_message<MClientCaps>(CEPH_CAP_OP_IMPORT, in->ino(), realm->inode->ino(), cap->get_cap_id(), cap->get_last_seq(), cap->pending(), cap->wanted(), - 0, cap->get_mseq(), mds->get_osd_epoch_barrier()); + 0, cap->get_mseq(), cap->get_last_issue(), mds->get_osd_epoch_barrier()); in->encode_cap_message(reap, cap); reap->snapbl = mds->server->get_snap_trace(session, realm); reap->set_cap_peer(p_cap_id, p_seq, p_mseq, peer, p_flags); @@ -14378,6 +14378,7 @@ bool MDCache::is_ready_to_trim_cache(void) void MDCache::upkeep_main(void) { + ceph_pthread_setname("mds-cache-trim"); std::unique_lock lock(upkeep_mutex); // create a "memory model" for the upkeep thread. The object maintains diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc index 0be568433ef..4bbf2a1a141 100644 --- a/src/mds/MDLog.cc +++ b/src/mds/MDLog.cc @@ -53,11 +53,12 @@ MDLog::MDLog(MDSRank* m) event_large_threshold = g_conf().get_val<uint64_t>("mds_log_event_large_threshold"); events_per_segment = g_conf().get_val<uint64_t>("mds_log_events_per_segment"); pause = g_conf().get_val<bool>("mds_log_pause"); - major_segment_event_ratio = g_conf().get_val<uint64_t>("mds_log_major_segment_event_ratio"); max_segments = g_conf().get_val<uint64_t>("mds_log_max_segments"); max_events = g_conf().get_val<int64_t>("mds_log_max_events"); skip_corrupt_events = g_conf().get_val<bool>("mds_log_skip_corrupt_events"); skip_unbounded_events = g_conf().get_val<bool>("mds_log_skip_unbounded_events"); + log_warn_factor = g_conf().get_val<double>("mds_log_warn_factor"); + minor_segments_per_major_segment = g_conf().get_val<uint64_t>("mds_log_minor_segments_per_major_segment"); upkeep_thread = std::thread(&MDLog::log_trim_upkeep, this); } @@ -258,7 +259,7 @@ void MDLog::create(MDSContext *c) logger->set(l_mdl_expos, journaler->get_expire_pos()); logger->set(l_mdl_wrpos, journaler->get_write_pos()); - submit_thread.create("md_submit"); + submit_thread.create("mds-log-submit"); } void MDLog::open(MDSContext *c) @@ -267,9 +268,9 @@ void MDLog::open(MDSContext *c) ceph_assert(!recovery_thread.is_started()); recovery_thread.set_completion(c); - recovery_thread.create("md_recov_open"); + recovery_thread.create("mds-log-recvr"); - submit_thread.create("md_submit"); + submit_thread.create("mds-log-submit"); // either append() or replay() will follow. } @@ -311,7 +312,7 @@ void MDLog::reopen(MDSContext *c) recovery_thread.join(); recovery_thread.set_completion(new C_ReopenComplete(this, c)); - recovery_thread.create("md_recov_reopen"); + recovery_thread.create("mds-log-reopen"); } void MDLog::append() @@ -357,14 +358,15 @@ void MDLog::_submit_entry(LogEvent *le, MDSLogContextBase* c) ceph_assert(!mds_is_shutting_down); event_seq++; - events_since_last_major_segment++; if (auto sb = dynamic_cast<SegmentBoundary*>(le); sb) { auto ls = _start_new_segment(sb); if (sb->is_major_segment_boundary()) { major_segments.insert(ls->seq); logger->set(l_mdl_segmjr, major_segments.size()); - events_since_last_major_segment = 0; + minor_segments_since_last_major_segment = 0; + } else { + ++minor_segments_since_last_major_segment; } } @@ -403,7 +405,7 @@ void MDLog::_segment_upkeep() uint64_t period = journaler->get_layout_period(); auto ls = get_current_segment(); // start a new segment? - if (events_since_last_major_segment > events_per_segment*major_segment_event_ratio) { + if (minor_segments_since_last_major_segment > minor_segments_per_major_segment) { dout(10) << __func__ << ": starting new major segment, current " << *ls << dendl; auto sle = mds->mdcache->create_subtree_map(); _submit_entry(sle, NULL); @@ -656,7 +658,13 @@ void MDLog::try_to_commit_open_file_table(uint64_t last_seq) } } +bool MDLog::is_trim_slow() const { + return (segments.size() > (size_t)(max_segments * log_warn_factor)); +} + void MDLog::log_trim_upkeep(void) { + ceph_pthread_setname("mds-log-trim"); + dout(10) << dendl; std::unique_lock mds_lock(mds->mds_lock); @@ -1008,7 +1016,7 @@ void MDLog::replay(MDSContext *c) } already_replayed = true; - replay_thread.create("md_log_replay"); + replay_thread.create("mds-log-replay"); } @@ -1474,7 +1482,6 @@ void MDLog::_replay_thread() } le->set_start_off(pos); - events_since_last_major_segment++; if (auto sb = dynamic_cast<SegmentBoundary*>(le.get()); sb) { auto seq = sb->get_seq(); if (seq > 0) { @@ -1487,7 +1494,9 @@ void MDLog::_replay_thread() if (sb->is_major_segment_boundary()) { major_segments.insert(event_seq); logger->set(l_mdl_segmjr, major_segments.size()); - events_since_last_major_segment = 0; + minor_segments_since_last_major_segment = 0; + } else { + ++minor_segments_since_last_major_segment; } } else { event_seq++; @@ -1618,9 +1627,6 @@ void MDLog::handle_conf_change(const std::set<std::string>& changed, const MDSMa if (changed.count("mds_log_events_per_segment")) { events_per_segment = g_conf().get_val<uint64_t>("mds_log_events_per_segment"); } - if (changed.count("mds_log_major_segment_event_ratio")) { - major_segment_event_ratio = g_conf().get_val<uint64_t>("mds_log_major_segment_event_ratio"); - } if (changed.count("mds_log_max_events")) { max_events = g_conf().get_val<int64_t>("mds_log_max_events"); } @@ -1642,4 +1648,10 @@ void MDLog::handle_conf_change(const std::set<std::string>& changed, const MDSMa if (changed.count("mds_log_trim_decay_rate")){ log_trim_counter = DecayCounter(g_conf().get_val<double>("mds_log_trim_decay_rate")); } + if (changed.count("mds_log_warn_factor")) { + log_warn_factor = g_conf().get_val<double>("mds_log_warn_factor"); + } + if (changed.count("mds_log_minor_segments_per_major_segment")) { + minor_segments_per_major_segment = g_conf().get_val<uint64_t>("mds_log_minor_segments_per_major_segment"); + } } diff --git a/src/mds/MDLog.h b/src/mds/MDLog.h index e2ab4e686cd..a858b40fa03 100644 --- a/src/mds/MDLog.h +++ b/src/mds/MDLog.h @@ -173,6 +173,9 @@ public: // replay state std::map<inodeno_t, std::set<inodeno_t>> pending_exports; + // beacon needs me too + bool is_trim_slow() const; + protected: struct PendingEvent { PendingEvent(LogEvent *e, Context* c, bool f=false) : le(e), fin(c), flush(f) {} @@ -302,9 +305,9 @@ private: bool debug_subtrees; std::atomic_uint64_t event_large_threshold; // accessed by submit thread uint64_t events_per_segment; - uint64_t major_segment_event_ratio; int64_t max_events; uint64_t max_segments; + uint64_t minor_segments_per_major_segment; bool pause; bool skip_corrupt_events; bool skip_unbounded_events; @@ -312,7 +315,8 @@ private: std::set<uint64_t> major_segments; std::set<LogSegment*> expired_segments; std::set<LogSegment*> expiring_segments; - uint64_t events_since_last_major_segment = 0; + uint64_t minor_segments_since_last_major_segment = 0; + double log_warn_factor; // log trimming decay counter DecayCounter log_trim_counter; diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 30820493cda..52ed930d71b 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -496,7 +496,7 @@ MDSRank::MDSRank( objecter->unset_honor_pool_full(); - finisher = new Finisher(cct, "MDSRank", "MR_Finisher"); + finisher = new Finisher(cct, "MDSRank", "mds-rank-fin"); mdcache = new MDCache(this, purge_queue); mdlog = new MDLog(this); @@ -581,7 +581,7 @@ void MDSRankDispatcher::init() // who is interested in it. handle_osd_map(); - progress_thread.create("mds_rank_progr"); + progress_thread.create("mds-rank-progr"); purge_queue.init(); diff --git a/src/mds/MetricAggregator.cc b/src/mds/MetricAggregator.cc index 1d17bbf3e92..6cbd9a094c0 100644 --- a/src/mds/MetricAggregator.cc +++ b/src/mds/MetricAggregator.cc @@ -73,6 +73,7 @@ int MetricAggregator::init() { m_cct->get_perfcounters_collection()->add(m_perf_counters); pinger = std::thread([this]() { + ceph_pthread_setname("mds-ping"); std::unique_lock locker(lock); while (!stopping) { ping_all_active_ranks(); diff --git a/src/mds/MetricsHandler.cc b/src/mds/MetricsHandler.cc index 9ad10b9d6e6..9fc4c6122a4 100644 --- a/src/mds/MetricsHandler.cc +++ b/src/mds/MetricsHandler.cc @@ -51,6 +51,7 @@ void MetricsHandler::init() { dout(10) << dendl; updater = std::thread([this]() { + ceph_pthread_setname("mds-metrics"); std::unique_lock locker(lock); while (!stopping) { double after = g_conf().get_val<std::chrono::seconds>("mds_metrics_update_interval").count(); diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 722b6bd7422..6b12f710db4 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -1957,10 +1957,10 @@ void Migrator::handle_export_ack(const cref_t<MExportDirAck> &m) // this keeps authority().first in sync with subtree auth state in the journal. mdcache->adjust_subtree_auth(dir, it->second.peer, mds->get_nodeid()); + ceph_assert(g_conf()->mds_kill_export_at != 10); // log export completion, then finish (unfreeze, trigger finish context, etc.) mds->mdlog->submit_entry(le, new C_MDS_ExportFinishLogged(this, dir)); mds->mdlog->flush(); - ceph_assert(g_conf()->mds_kill_export_at != 10); } void Migrator::export_notify_abort(CDir *dir, export_state_t& stat, set<CDir*>& bounds) @@ -2844,7 +2844,6 @@ void Migrator::import_reverse(CDir *dir) dout(7) << *dir << dendl; import_state_t& stat = import_state[dir->dirfrag()]; - stat.state = IMPORT_ABORTING; set<CDir*> bounds; mdcache->get_subtree_bounds(dir, bounds); @@ -2950,10 +2949,14 @@ void Migrator::import_reverse(CDir *dir) } in->put(CInode::PIN_IMPORTINGCAPS); } + } + + if (stat.state == IMPORT_LOGGINGSTART || stat.state == IMPORT_ACKING) { for (auto& p : stat.session_map) { Session *session = p.second.first; session->dec_importing(); } + mds->server->close_forced_opened_sessions(stat.session_map); } // log our failure @@ -2962,6 +2965,7 @@ void Migrator::import_reverse(CDir *dir) mdcache->trim(num_dentries); // try trimming dentries // notify bystanders; wait in aborting state + stat.state = IMPORT_ABORTING; import_notify_abort(dir, bounds); } @@ -3054,10 +3058,9 @@ void Migrator::import_logged_start(dirfrag_t df, CDir *dir, mds_rank_t from, dout(7) << *dir << dendl; map<dirfrag_t, import_state_t>::iterator it = import_state.find(dir->dirfrag()); - if (it == import_state.end() || - it->second.state != IMPORT_LOGGINGSTART) { + ceph_assert(it != import_state.end()); + if (it->second.state != IMPORT_LOGGINGSTART) { dout(7) << "import " << df << " must have aborted" << dendl; - mds->server->finish_force_open_sessions(imported_session_map); return; } diff --git a/src/mds/PurgeQueue.cc b/src/mds/PurgeQueue.cc index 925bff16542..4426d3ca6fe 100644 --- a/src/mds/PurgeQueue.cc +++ b/src/mds/PurgeQueue.cc @@ -122,7 +122,7 @@ PurgeQueue::PurgeQueue( cct(cct_), rank(rank_), metadata_pool(metadata_pool_), - finisher(cct, "PurgeQueue", "PQ_Finisher"), + finisher(cct, "PurgeQueue", "mds-pq-fin"), timer(cct, lock), filer(objecter_, &finisher), objecter(objecter_), diff --git a/src/mds/QuiesceAgent.h b/src/mds/QuiesceAgent.h index 5c07d6d8074..85900e8e71b 100644 --- a/src/mds/QuiesceAgent.h +++ b/src/mds/QuiesceAgent.h @@ -30,7 +30,7 @@ class QuiesceAgent { : quiesce_control(quiesce_control) , stop_agent_thread(false) , agent_thread(this) { - agent_thread.create("quiesce.agt"); + agent_thread.create("mds-q-agt"); }; virtual ~QuiesceAgent() { diff --git a/src/mds/QuiesceDbEncoding.h b/src/mds/QuiesceDbEncoding.h index c76ed2d0c52..27c7e3ca2d0 100644 --- a/src/mds/QuiesceDbEncoding.h +++ b/src/mds/QuiesceDbEncoding.h @@ -15,7 +15,7 @@ #include "include/encoding.h" #include <stdint.h> -void encode(QuiesceDbVersion const& v, bufferlist& bl, uint64_t features = 0) +inline void encode(QuiesceDbVersion const& v, bufferlist& bl, uint64_t features = 0) { ENCODE_START(1, 1, bl); encode(v.epoch, bl, features); @@ -23,7 +23,7 @@ void encode(QuiesceDbVersion const& v, bufferlist& bl, uint64_t features = 0) ENCODE_FINISH(bl); } -void decode(QuiesceDbVersion& v, bufferlist::const_iterator& p) +inline void decode(QuiesceDbVersion& v, bufferlist::const_iterator& p) { DECODE_START(1, p); decode(v.epoch, p); @@ -31,33 +31,33 @@ void decode(QuiesceDbVersion& v, bufferlist::const_iterator& p) DECODE_FINISH(p); } -void encode(QuiesceState const & state, bufferlist& bl, uint64_t features=0) +inline void encode(QuiesceState const & state, bufferlist& bl, uint64_t features=0) { static_assert(QuiesceState::QS__MAX <= UINT8_MAX); uint8_t v = (uint8_t)state; encode(v, bl, features); } -void decode(QuiesceState & state, bufferlist::const_iterator& p) +inline void decode(QuiesceState & state, bufferlist::const_iterator& p) { uint8_t v = 0; decode(v, p); state = (QuiesceState)v; } -void encode(QuiesceTimeInterval const & interval, bufferlist& bl, uint64_t features=0) +inline void encode(QuiesceTimeInterval const & interval, bufferlist& bl, uint64_t features=0) { encode(interval.count(), bl, features); } -void decode(QuiesceTimeInterval & interval, bufferlist::const_iterator& p) +inline void decode(QuiesceTimeInterval & interval, bufferlist::const_iterator& p) { QuiesceClock::rep count; decode(count, p); interval = QuiesceTimeInterval { count }; } -void encode(RecordedQuiesceState const& rstate, bufferlist& bl, uint64_t features = 0) +inline void encode(RecordedQuiesceState const& rstate, bufferlist& bl, uint64_t features = 0) { ENCODE_START(1, 1, bl); encode(rstate.state, bl, features); @@ -65,7 +65,7 @@ void encode(RecordedQuiesceState const& rstate, bufferlist& bl, uint64_t feature ENCODE_FINISH(bl); } -void decode(RecordedQuiesceState& rstate, bufferlist::const_iterator& p) +inline void decode(RecordedQuiesceState& rstate, bufferlist::const_iterator& p) { DECODE_START(1, p); decode(rstate.state, p); @@ -73,7 +73,7 @@ void decode(RecordedQuiesceState& rstate, bufferlist::const_iterator& p) DECODE_FINISH(p); } -void encode(QuiesceSet::MemberInfo const& member, bufferlist& bl, uint64_t features = 0) +inline void encode(QuiesceSet::MemberInfo const& member, bufferlist& bl, uint64_t features = 0) { ENCODE_START(1, 1, bl); encode(member.rstate, bl, features); @@ -81,7 +81,7 @@ void encode(QuiesceSet::MemberInfo const& member, bufferlist& bl, uint64_t featu ENCODE_FINISH(bl); } -void decode(QuiesceSet::MemberInfo& member, bufferlist::const_iterator& p) +inline void decode(QuiesceSet::MemberInfo& member, bufferlist::const_iterator& p) { DECODE_START(1, p); decode(member.rstate, p); @@ -89,7 +89,7 @@ void decode(QuiesceSet::MemberInfo& member, bufferlist::const_iterator& p) DECODE_FINISH(p); } -void encode(QuiesceSet const& set, bufferlist& bl, uint64_t features = 0) +inline void encode(QuiesceSet const& set, bufferlist& bl, uint64_t features = 0) { ENCODE_START(1, 1, bl); encode(set.version, bl, features); @@ -100,7 +100,7 @@ void encode(QuiesceSet const& set, bufferlist& bl, uint64_t features = 0) ENCODE_FINISH(bl); } -void decode(QuiesceSet& set, bufferlist::const_iterator& p) +inline void decode(QuiesceSet& set, bufferlist::const_iterator& p) { DECODE_START(1, p); decode(set.version, p); @@ -111,7 +111,7 @@ void decode(QuiesceSet& set, bufferlist::const_iterator& p) DECODE_FINISH(p); } -void encode(QuiesceDbRequest const& req, bufferlist& bl, uint64_t features = 0) +inline void encode(QuiesceDbRequest const& req, bufferlist& bl, uint64_t features = 0) { ENCODE_START(1, 1, bl); encode(req.control.raw, bl, features); @@ -124,7 +124,7 @@ void encode(QuiesceDbRequest const& req, bufferlist& bl, uint64_t features = 0) ENCODE_FINISH(bl); } -void decode(QuiesceDbRequest& req, bufferlist::const_iterator& p) +inline void decode(QuiesceDbRequest& req, bufferlist::const_iterator& p) { DECODE_START(1, p); decode(req.control.raw, p); @@ -137,7 +137,7 @@ void decode(QuiesceDbRequest& req, bufferlist::const_iterator& p) DECODE_FINISH(p); } -void encode(QuiesceDbListing const& listing, bufferlist& bl, uint64_t features = 0) +inline void encode(QuiesceDbListing const& listing, bufferlist& bl, uint64_t features = 0) { ENCODE_START(1, 1, bl); encode(listing.db_version, bl, features); @@ -146,7 +146,7 @@ void encode(QuiesceDbListing const& listing, bufferlist& bl, uint64_t features = ENCODE_FINISH(bl); } -void decode(QuiesceDbListing& listing, bufferlist::const_iterator& p) +inline void decode(QuiesceDbListing& listing, bufferlist::const_iterator& p) { DECODE_START(1, p); decode(listing.db_version, p); @@ -155,7 +155,7 @@ void decode(QuiesceDbListing& listing, bufferlist::const_iterator& p) DECODE_FINISH(p); } -void encode(QuiesceDbPeerListing const& listing, bufferlist& bl, uint64_t features = 0) +inline void encode(QuiesceDbPeerListing const& listing, bufferlist& bl, uint64_t features = 0) { ENCODE_START(1, 1, bl); encode(listing.origin, bl, features); @@ -163,7 +163,7 @@ void encode(QuiesceDbPeerListing const& listing, bufferlist& bl, uint64_t featur ENCODE_FINISH(bl); } -void decode(QuiesceDbPeerListing& listing, bufferlist::const_iterator& p) +inline void decode(QuiesceDbPeerListing& listing, bufferlist::const_iterator& p) { DECODE_START(1, p); decode(listing.origin, p); @@ -171,7 +171,7 @@ void decode(QuiesceDbPeerListing& listing, bufferlist::const_iterator& p) DECODE_FINISH(p); } -void encode(QuiesceMap::RootInfo const& root, bufferlist& bl, uint64_t features = 0) +inline void encode(QuiesceMap::RootInfo const& root, bufferlist& bl, uint64_t features = 0) { ENCODE_START(1, 1, bl); encode(root.state, bl, features); @@ -179,7 +179,7 @@ void encode(QuiesceMap::RootInfo const& root, bufferlist& bl, uint64_t features ENCODE_FINISH(bl); } -void decode(QuiesceMap::RootInfo& root, bufferlist::const_iterator& p) +inline void decode(QuiesceMap::RootInfo& root, bufferlist::const_iterator& p) { DECODE_START(1, p); decode(root.state, p); @@ -187,7 +187,7 @@ void decode(QuiesceMap::RootInfo& root, bufferlist::const_iterator& p) DECODE_FINISH(p); } -void encode(QuiesceMap const& map, bufferlist& bl, uint64_t features = 0) +inline void encode(QuiesceMap const& map, bufferlist& bl, uint64_t features = 0) { ENCODE_START(1, 1, bl); encode(map.db_version, bl, features); @@ -195,7 +195,7 @@ void encode(QuiesceMap const& map, bufferlist& bl, uint64_t features = 0) ENCODE_FINISH(bl); } -void decode(QuiesceMap& map, bufferlist::const_iterator& p) +inline void decode(QuiesceMap& map, bufferlist::const_iterator& p) { DECODE_START(1, p); decode(map.db_version, p); @@ -203,7 +203,7 @@ void decode(QuiesceMap& map, bufferlist::const_iterator& p) DECODE_FINISH(p); } -void encode(QuiesceDbPeerAck const& ack, bufferlist& bl, uint64_t features = 0) +inline void encode(QuiesceDbPeerAck const& ack, bufferlist& bl, uint64_t features = 0) { ENCODE_START(1, 1, bl); encode(ack.origin, bl, features); @@ -211,7 +211,7 @@ void encode(QuiesceDbPeerAck const& ack, bufferlist& bl, uint64_t features = 0) ENCODE_FINISH(bl); } -void decode(QuiesceDbPeerAck& ack, bufferlist::const_iterator& p) +inline void decode(QuiesceDbPeerAck& ack, bufferlist::const_iterator& p) { DECODE_START(1, p); decode(ack.origin, p); diff --git a/src/mds/QuiesceDbManager.cc b/src/mds/QuiesceDbManager.cc index 12c83634e54..bb3ae93e378 100644 --- a/src/mds/QuiesceDbManager.cc +++ b/src/mds/QuiesceDbManager.cc @@ -200,7 +200,7 @@ void QuiesceDbManager::update_membership(const QuiesceClusterMembership& new_mem // start the thread dout(5) << "starting the db mgr thread at epoch: " << new_membership.epoch << dendl; db_thread_should_exit = false; - quiesce_db_thread.create("quiesce_db_mgr"); + quiesce_db_thread.create("mds-q-db"); } else if (quiesce_db_thread.is_started()) { submit_condition.notify_all(); } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index cf286b46d46..5874a3dce56 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -615,6 +615,9 @@ void Server::handle_client_session(const cref_t<MClientSession> &m) mds->send_message(reply, m->get_connection()); return; } + if (!session->client_opened) { + session->client_opened = true; + } if (session->is_opening() || session->is_open() || session->is_stale() || @@ -1054,7 +1057,7 @@ version_t Server::prepare_force_open_sessions(map<client_t,entity_inst_t>& cm, return pv; } -void Server::finish_force_open_sessions(const map<client_t,pair<Session*,uint64_t> >& smap, +void Server::finish_force_open_sessions(map<client_t,pair<Session*,uint64_t> >& smap, bool dec_import) { /* @@ -1073,7 +1076,7 @@ void Server::finish_force_open_sessions(const map<client_t,pair<Session*,uint64_ dout(10) << "force_open_sessions skipping changed " << session->info.inst << dendl; } else { dout(10) << "force_open_sessions opened " << session->info.inst << dendl; - mds->sessionmap.set_state(session, Session::STATE_OPEN); + it.second.second = mds->sessionmap.set_state(session, Session::STATE_OPEN); mds->sessionmap.touch_session(session); metrics_handler->add_session(session); @@ -1103,6 +1106,29 @@ void Server::finish_force_open_sessions(const map<client_t,pair<Session*,uint64_ dout(10) << __func__ << ": final v " << mds->sessionmap.get_version() << dendl; } +void Server::close_forced_opened_sessions(const map<client_t,pair<Session*,uint64_t> >& smap) +{ + dout(10) << __func__ << " on " << smap.size() << " clients" << dendl; + + for (auto &it : smap) { + Session *session = it.second.first; + uint64_t sseq = it.second.second; + if (sseq == 0) + continue; + if (session->get_state_seq() != sseq) { + dout(10) << "skipping changed session (" << session->get_state_name() << ") " + << session->info.inst << dendl; + continue; + } + if (session->client_opened) + continue; + dout(10) << "closing forced opened session (" << session->get_state_name() << ") " + << session->info.inst << dendl; + ceph_assert(!session->is_importing()); + journal_close_session(session, Session::STATE_CLOSING, NULL); + } +} + class C_MDS_TerminatedSessions : public ServerContext { void finish(int r) override { server->terminating_sessions = false; diff --git a/src/mds/Server.h b/src/mds/Server.h index 68842ea01cb..5f9a763e550 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -129,8 +129,9 @@ public: version_t prepare_force_open_sessions(std::map<client_t,entity_inst_t> &cm, std::map<client_t,client_metadata_t>& cmm, std::map<client_t,std::pair<Session*,uint64_t> >& smap); - void finish_force_open_sessions(const std::map<client_t,std::pair<Session*,uint64_t> >& smap, + void finish_force_open_sessions(std::map<client_t,std::pair<Session*,uint64_t> >& smap, bool dec_import=true); + void close_forced_opened_sessions(const std::map<client_t,std::pair<Session*,uint64_t> >& smap); void flush_client_sessions(std::set<client_t>& client_set, MDSGatherBuilder& gather); void finish_flush_session(Session *session, version_t seq); void terminate_sessions(); diff --git a/src/mds/SessionMap.cc b/src/mds/SessionMap.cc index ba0b0817738..0f6038eb82b 100644 --- a/src/mds/SessionMap.cc +++ b/src/mds/SessionMap.cc @@ -615,6 +615,7 @@ void Session::dump(Formatter *f, bool cap_dump) const f->dump_unsigned("num_completed_requests", get_num_completed_requests()); f->dump_unsigned("num_completed_flushes", get_num_completed_flushes()); f->dump_bool("reconnecting", reconnecting); + f->dump_int("importing_count", importing_count); f->dump_object("recall_caps", recall_caps); f->dump_object("release_caps", release_caps); f->dump_object("recall_caps_throttle", recall_caps_throttle); diff --git a/src/mds/SessionMap.h b/src/mds/SessionMap.h index 9e82f00a9bf..bfe7dcd4895 100644 --- a/src/mds/SessionMap.h +++ b/src/mds/SessionMap.h @@ -417,6 +417,10 @@ public: session_info_t info; ///< durable bits MDSAuthCaps auth_caps; + // True if the session is opened by the client. + // False if the session is forced to open, until it is opened again by the client. + bool client_opened = false; + xlist<Session*>::item item_session_list; std::list<ceph::ref_t<Message>> preopen_out_queue; ///< messages for client, queued before they connect diff --git a/src/messages/MClientCaps.h b/src/messages/MClientCaps.h index 96b2cb7d8b8..b001032225e 100644 --- a/src/messages/MClientCaps.h +++ b/src/messages/MClientCaps.h @@ -117,9 +117,9 @@ private: void set_ctime(const utime_t &t) { ctime = t; } void set_atime(const utime_t &t) { atime = t; } - void set_cap_peer(uint64_t id, ceph_seq_t seq, ceph_seq_t mseq, int mds, int flags) { + void set_cap_peer(uint64_t id, ceph_seq_t issue_seq, ceph_seq_t mseq, int mds, int flags) { peer.cap_id = id; - peer.seq = seq; + peer.issue_seq = issue_seq; peer.mseq = mseq; peer.mds = mds; peer.flags = flags; @@ -137,11 +137,12 @@ protected: inodeno_t ino, inodeno_t realm, uint64_t id, - long seq, + ceph_seq_t seq, int caps, int wanted, int dirty, - int mseq, + ceph_seq_t mseq, + ceph_seq_t issue_seq, epoch_t oeb) : SafeMessage{CEPH_MSG_CLIENT_CAPS, HEAD_VERSION, COMPAT_VERSION}, osd_epoch_barrier(oeb) { @@ -155,11 +156,12 @@ protected: head.wanted = wanted; head.dirty = dirty; head.migrate_seq = mseq; + head.issue_seq = issue_seq; memset(&peer, 0, sizeof(peer)); } MClientCaps(int op, inodeno_t ino, inodeno_t realm, - uint64_t id, int mseq, epoch_t oeb) + uint64_t id, ceph_seq_t mseq, epoch_t oeb) : SafeMessage{CEPH_MSG_CLIENT_CAPS, HEAD_VERSION, COMPAT_VERSION}, osd_epoch_barrier(oeb) { memset(&head, 0, sizeof(head)); @@ -181,7 +183,8 @@ public: out << "client_caps(" << ceph_cap_op_name(head.op) << " ino " << inodeno_t(head.ino) << " " << head.cap_id - << " seq " << head.seq; + << " seq " << head.seq + << " issue_seq " << head.issue_seq; if (get_tid()) out << " tid " << get_tid(); out << " caps=" << ccap_string(head.caps) diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index d8cca4ceb61..f742303c6e9 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -758,6 +758,14 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op) if (state == MDSMap::STATE_DNE) { dout(1) << __func__ << ": DNE from " << info << dendl; + + /* send a beacon reply so MDSDaemon::suicide() finishes the + Beacon::send_and_wait() call */ + auto beacon = make_message<MMDSBeacon>(mon.monmap->fsid, + m->get_global_id(), m->get_name(), get_fsmap().get_epoch(), + m->get_state(), m->get_seq(), CEPH_FEATURES_SUPPORTED_DEFAULT); + mon.send_reply(op, beacon.detach()); + goto evict; } diff --git a/src/mon/MonMap.cc b/src/mon/MonMap.cc index 5598bbe8709..8d0540d71f2 100644 --- a/src/mon/MonMap.cc +++ b/src/mon/MonMap.cc @@ -196,7 +196,12 @@ void MonMap::encode(ceph::buffer::list& blist, uint64_t con_features) const if (!HAVE_FEATURE(con_features, MONENC) || !HAVE_FEATURE(con_features, SERVER_NAUTILUS)) { for (auto& [name, info] : mon_info) { - legacy_mon_addr[name] = info.public_addrs.legacy_addr(); + // see note in mon_info_t::encode() + auto addr = info.public_addrs.legacy_addr(); + if (addr == entity_addr_t()) { + addr = info.public_addrs.as_legacy_addr(); + } + legacy_mon_addr[name] = addr; } } diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index 9fda03b4905..719403925ad 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -16,7 +16,9 @@ #include "NVMeofGwMon.h" #include "NVMeofGwMap.h" #include "OSDMonitor.h" +#include "mon/health_check.h" +using std::list; using std::map; using std::make_pair; using std::ostream; @@ -893,6 +895,47 @@ struct CMonRequestProposal : public Context { } }; +void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const +{ + list<string> singleGatewayDetail; + list<string> gatewayDownDetail; + for (const auto& created_map_pair: created_gws) { + const auto& group_key = created_map_pair.first; + auto& group = group_key.second; + const NvmeGwMonStates& gw_created_map = created_map_pair.second; + if ( gw_created_map.size() == 1) { + ostringstream ss; + ss << "NVMeoF Gateway Group '" << group << "' has 1 gateway." ; + singleGatewayDetail.push_back(ss.str()); + } + for (const auto& gw_created_pair: gw_created_map) { + const auto& gw_id = gw_created_pair.first; + const auto& gw_created = gw_created_pair.second; + if (gw_created.availability == gw_availability_t::GW_UNAVAILABLE) { + ostringstream ss; + ss << "NVMeoF Gateway '" << gw_id << "' is unavailable." ; + gatewayDownDetail.push_back(ss.str()); + } + } + } + if (!singleGatewayDetail.empty()) { + ostringstream ss; + ss << singleGatewayDetail.size() << " group(s) have only 1 nvmeof gateway" + << "; HA is not possible with single gateway."; + auto& d = checks->add("NVMEOF_SINGLE_GATEWAY", HEALTH_WARN, + ss.str(), singleGatewayDetail.size()); + d.detail.swap(singleGatewayDetail); + } + if (!gatewayDownDetail.empty()) { + ostringstream ss; + ss << gatewayDownDetail.size() << " gateway(s) are in unavailable state" + << "; gateway might be down, try to redeploy."; + auto& d = checks->add("NVMEOF_GATEWAY_DOWN", HEALTH_WARN, + ss.str(), gatewayDownDetail.size()); + d.detail.swap(gatewayDownDetail); + } +} + int NVMeofGwMap::blocklist_gw( const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId grpid, epoch_t &epoch, bool failover) diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 267d85b10f9..5f657733012 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -27,6 +27,9 @@ #include "NVMeofGwTypes.h" using ceph::coarse_mono_clock; + +class health_check_map_t; + class Monitor; /*-------------------*/ class NVMeofGwMap @@ -140,6 +143,8 @@ public: decode(fsm_timers, bl); DECODE_FINISH(bl); } + + void get_health_checks(health_check_map_t *checks) const; }; #include "NVMeofGwSerialize.h" diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 9d80c23a4c1..0fe5c3e655f 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -181,6 +181,11 @@ void NVMeofGwMon::encode_pending(MonitorDBStore::TransactionRef t) << HAVE_FEATURE(mon.get_quorum_con_features(), NVMEOFHA) << dendl; put_version(t, pending_map.epoch, bl); put_last_committed(t, pending_map.epoch); + + //health + health_check_map_t checks; + pending_map.get_health_checks(&checks); + encode_health(checks, t); } void NVMeofGwMon::update_from_paxos(bool *need_bootstrap) @@ -193,6 +198,7 @@ void NVMeofGwMon::update_from_paxos(bool *need_bootstrap) bufferlist bl; int err = get_version(version, bl); ceph_assert(err == 0); + load_health(); auto p = bl.cbegin(); map.decode(p); diff --git a/src/msg/async/Stack.h b/src/msg/async/Stack.h index 6739968f4e2..5f8bbc172df 100644 --- a/src/msg/async/Stack.h +++ b/src/msg/async/Stack.h @@ -352,7 +352,7 @@ class NetworkStack { static constexpr int TASK_COMM_LEN = 16; char tp_name[TASK_COMM_LEN]; sprintf(tp_name, "msgr-worker-%u", id); - ceph_pthread_setname(pthread_self(), tp_name); + ceph_pthread_setname(tp_name); } protected: diff --git a/src/msg/async/rdma/RDMAStack.cc b/src/msg/async/rdma/RDMAStack.cc index 12db599d684..789a624cf90 100644 --- a/src/msg/async/rdma/RDMAStack.cc +++ b/src/msg/async/rdma/RDMAStack.cc @@ -92,7 +92,6 @@ void RDMADispatcher::polling_start() ceph_assert(rx_cq); t = std::thread(&RDMADispatcher::polling, this); - ceph_pthread_setname(t.native_handle(), "rdma-polling"); } void RDMADispatcher::polling_stop() @@ -263,6 +262,7 @@ int RDMADispatcher::post_chunks_to_rq(int num, QueuePair *qp) void RDMADispatcher::polling() { + ceph_pthread_setname("rdma-polling"); static int MAX_COMPLETIONS = 32; ibv_wc wc[MAX_COMPLETIONS]; diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 14d2f85f40f..44f8e85b5ef 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -6006,7 +6006,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) object_info_t& oi = obs.oi; const hobject_t& soid = oi.soid; const bool skip_data_digest = osd->store->has_builtin_csum() && - osd->osd_skip_data_digest; + *osd->osd_skip_data_digest; PGTransaction* t = ctx->op_t.get(); @@ -6069,9 +6069,9 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) // munge ZERO -> TRUNCATE? (don't munge to DELETE or we risk hosing attributes) if (op.op == CEPH_OSD_OP_ZERO && obs.exists && - op.extent.offset < static_cast<Option::size_t>(osd->osd_max_object_size) && + op.extent.offset < *osd->osd_max_object_size && op.extent.length >= 1 && - op.extent.length <= static_cast<Option::size_t>(osd->osd_max_object_size) && + op.extent.length <= *osd->osd_max_object_size && op.extent.offset + op.extent.length >= oi.size) { if (op.extent.offset >= oi.size) { // no-op @@ -6781,7 +6781,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) } result = check_offset_and_length( op.extent.offset, op.extent.length, - static_cast<Option::size_t>(osd->osd_max_object_size), get_dpp()); + *osd->osd_max_object_size, get_dpp()); if (result < 0) break; @@ -6838,7 +6838,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) } result = check_offset_and_length( 0, op.extent.length, - static_cast<Option::size_t>(osd->osd_max_object_size), get_dpp()); + *osd->osd_max_object_size, get_dpp()); if (result < 0) break; @@ -6888,7 +6888,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) { // zero result = check_offset_and_length( op.extent.offset, op.extent.length, - static_cast<Option::size_t>(osd->osd_max_object_size), get_dpp()); + *osd->osd_max_object_size, get_dpp()); if (result < 0) break; @@ -6953,7 +6953,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) result = check_offset_and_length( op.extent.offset, op.extent.length, - static_cast<Option::size_t>(osd->osd_max_object_size), get_dpp()); + *osd->osd_max_object_size, get_dpp()); if (result < 0) break; diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc index c8cf27d2116..110c2c7d266 100644 --- a/src/osd/scrubber/osd_scrub.cc +++ b/src/osd/scrubber/osd_scrub.cc @@ -65,7 +65,7 @@ void OsdScrub::dump_scrubs(ceph::Formatter* f) const void OsdScrub::dump_scrub_reservations(ceph::Formatter* f) const { m_resource_bookkeeper.dump_scrub_reservations(f); - f->open_array_section("remote_scrub_reservations"); + f->open_object_section("remote_scrub_reservations"); m_osd_svc.get_scrub_reserver().dump(f); f->close_section(); } diff --git a/src/osd/scrubber/osd_scrub_sched.cc b/src/osd/scrubber/osd_scrub_sched.cc index cd80625aaec..c116bcbb4c2 100644 --- a/src/osd/scrubber/osd_scrub_sched.cc +++ b/src/osd/scrubber/osd_scrub_sched.cc @@ -140,9 +140,10 @@ bool ScrubQueue::remove_entry_unlocked(spg_t pgid, scrub_level_t s_or_d) void ScrubQueue::dump_scrubs(ceph::Formatter* f) const { ceph_assert(f != nullptr); + const auto query_time = ceph_clock_now(); f->open_array_section("scrubs"); for_each_job( - [&f](const Scrub::SchedEntry& e) { + [&f, query_time](const Scrub::SchedEntry& e) { f->open_object_section("scrub"); f->dump_stream("pgid") << e.pgid; f->dump_stream("sched_time") << e.schedule.not_before; @@ -151,6 +152,15 @@ void ScrubQueue::dump_scrubs(ceph::Formatter* f) const f->dump_bool( "forced", e.schedule.scheduled_at == PgScrubber::scrub_must_stamp()); + + f->dump_stream("level") << (e.level == scrub_level_t::shallow + ? "shallow" + : "deep"); + f->dump_stream("urgency") << fmt::format("{}", e.urgency); + f->dump_bool("eligible", e.schedule.not_before <= query_time); + f->dump_bool("overdue", e.schedule.deadline < query_time); + f->dump_stream("last_issue") << fmt::format("{}", e.last_issue); + f->close_section(); }, std::numeric_limits<int>::max()); diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index c37f31d28dc..aa53df5ae8a 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -3,13 +3,13 @@ #include "./pg_scrubber.h" // '.' notation used to affect clang-format order +#include <fmt/ranges.h> + #include <cmath> #include <iostream> #include <span> #include <vector> -#include <fmt/ranges.h> - #include "debug.h" #include "common/ceph_time.h" @@ -766,8 +766,13 @@ void PgScrubber::on_operator_periodic_cmd( asok_response_section(f, true, scrub_level, stamp); if (scrub_level == scrub_level_t::deep) { + const auto saved_shallow_stamp = m_pg->info.history.last_scrub_stamp; // this call sets both stamps m_pg->set_last_deep_scrub_stamp(stamp); + // restore the shallow stamp, as otherwise it will be scheduled before + // the deep, failing whatever test code called us (this is a test-only + // interface). + m_pg->set_last_scrub_stamp(saved_shallow_stamp); } else { m_pg->set_last_scrub_stamp(stamp); } @@ -819,21 +824,21 @@ namespace { * an aux function to be used in select_range() below, to * select the correct chunk size based on the type of scrub */ -int size_from_conf( +int64_t size_from_conf( bool is_deep, const ceph::common::ConfigProxy& conf, - std::string_view deep_opt, - std::string_view shallow_opt) + const md_config_cacher_t<int64_t>& deep_opt, + const md_config_cacher_t<int64_t>& shallow_opt) { if (!is_deep) { - auto sz = conf.get_val<int64_t>(shallow_opt); + auto sz = *shallow_opt; if (sz != 0) { // assuming '0' means that no distinction was yet configured between // deep and shallow scrubbing - return static_cast<int>(sz); + return sz; } } - return static_cast<int>(conf.get_val<int64_t>(deep_opt)); + return *deep_opt; } } // anonymous namespace @@ -912,16 +917,16 @@ std::optional<uint64_t> PgScrubber::select_range() dout(20) << fmt::format( "{} {} mins: {}d {}s, max: {}d {}s", __func__, (m_is_deep ? "D" : "S"), - conf.get_val<int64_t>("osd_scrub_chunk_min"), - conf.get_val<int64_t>("osd_shallow_scrub_chunk_min"), - conf.get_val<int64_t>("osd_scrub_chunk_max"), - conf.get_val<int64_t>("osd_shallow_scrub_chunk_max")) + *osd_scrub_chunk_min, + *osd_shallow_scrub_chunk_min, + *osd_scrub_chunk_max, + *osd_shallow_scrub_chunk_max) << dendl; - const int min_from_conf = size_from_conf( - m_is_deep, conf, "osd_scrub_chunk_min", "osd_shallow_scrub_chunk_min"); - const int max_from_conf = size_from_conf( - m_is_deep, conf, "osd_scrub_chunk_max", "osd_shallow_scrub_chunk_max"); + const int min_from_conf = static_cast<int>(size_from_conf( + m_is_deep, conf, osd_scrub_chunk_min, osd_shallow_scrub_chunk_min)); + const int max_from_conf = static_cast<int>(size_from_conf( + m_is_deep, conf, osd_scrub_chunk_max, osd_shallow_scrub_chunk_max)); const int divisor = static_cast<int>(preemption_data.chunk_divisor()); const int min_chunk_sz = std::max(3, min_from_conf / divisor); @@ -1635,7 +1640,7 @@ void PgScrubber::replica_scrub_op(OpRequestRef op) advance_token(); const auto& conf = m_pg->get_cct()->_conf; const int max_from_conf = size_from_conf( - m_is_deep, conf, "osd_scrub_chunk_max", "osd_shallow_scrub_chunk_max"); + m_is_deep, conf, osd_scrub_chunk_max, osd_shallow_scrub_chunk_max); auto cost = get_scrub_cost(max_from_conf); m_osds->queue_for_rep_scrub(m_pg, m_replica_request_priority, @@ -2541,6 +2546,16 @@ PgScrubber::PgScrubber(PG* pg) , m_pg_id{pg->pg_id} , m_osds{m_pg->osd} , m_pg_whoami{pg->pg_whoami} + , osd_scrub_chunk_max{m_osds->cct->_conf, "osd_scrub_chunk_max"} + , osd_shallow_scrub_chunk_max{m_osds->cct->_conf, + "osd_shallow_scrub_chunk_max"} + , osd_scrub_chunk_min{m_osds->cct->_conf, "osd_scrub_chunk_min"} + , osd_shallow_scrub_chunk_min{m_osds->cct->_conf, + "osd_shallow_scrub_chunk_min"} + , osd_stats_update_period_scrubbing{ + m_osds->cct->_conf, "osd_stats_update_period_scrubbing"} + , osd_stats_update_period_not_scrubbing{ + m_osds->cct->_conf, "osd_stats_update_period_not_scrubbing"} , preemption_data{pg} { m_fsm = std::make_unique<ScrubMachine>(m_pg, this); @@ -2669,7 +2684,8 @@ const OSDMapRef& PgScrubber::get_osdmap() const LoggerSinkSet& PgScrubber::get_logger() const { return *m_osds->clog.get(); } -ostream &operator<<(ostream &out, const PgScrubber &scrubber) { +ostream& operator<<(ostream& out, const PgScrubber& scrubber) +{ return out << scrubber.m_flags; } @@ -2783,16 +2799,14 @@ void PgScrubber::update_scrub_stats(ceph::coarse_real_clock::time_point now_is) using clock = ceph::coarse_real_clock; using namespace std::chrono; - const seconds period_active = seconds(m_pg->get_cct()->_conf.get_val<int64_t>( - "osd_stats_update_period_scrubbing")); + const seconds period_active = seconds(*osd_stats_update_period_scrubbing); if (!period_active.count()) { // a way for the operator to disable these stats updates return; } - const seconds period_inactive = - seconds(m_pg->get_cct()->_conf.get_val<int64_t>( - "osd_stats_update_period_not_scrubbing") + - m_pg_id.pgid.m_seed % 30); + const seconds period_inactive = seconds( + *osd_stats_update_period_not_scrubbing + + m_pg_id.pgid.m_seed % 30); // determine the required update period, based on our current state auto period{period_inactive}; @@ -2826,10 +2840,10 @@ void PgScrubber::update_scrub_stats(ceph::coarse_real_clock::time_point now_is) // ///////////////////// preemption_data_t ////////////////////////////////// -PgScrubber::preemption_data_t::preemption_data_t(PG* pg) : m_pg{pg} +PgScrubber::preemption_data_t::preemption_data_t(PG* pg) : m_pg{pg}, + osd_scrub_max_preemptions{pg->cct->_conf, "osd_scrub_max_preemptions"} { - m_left = static_cast<int>( - m_pg->get_cct()->_conf.get_val<uint64_t>("osd_scrub_max_preemptions")); + m_left = *osd_scrub_max_preemptions; } void PgScrubber::preemption_data_t::reset() @@ -2838,8 +2852,7 @@ void PgScrubber::preemption_data_t::reset() m_preemptable = false; m_preempted = false; - m_left = static_cast<int>( - m_pg->cct->_conf.get_val<uint64_t>("osd_scrub_max_preemptions")); + m_left = *osd_scrub_max_preemptions; m_size_divisor = 1; } diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index 3d7e16cd359..0d9e8c1e9f6 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -75,6 +75,8 @@ Main Scrubber interfaces: #include <string_view> #include <vector> +#include "common/config_proxy.h" +#include "common/config_cacher.h" #include "osd/PG.h" #include "osd/scrubber_common.h" @@ -895,6 +897,24 @@ class PgScrubber : public ScrubPgIF, // scrub state. ceph::coarse_real_clock::time_point m_last_stat_upd{}; + // ------------------ cached (frequently used) configuration values + + /// initial (& max) number of objects to scrub in one pass - deep scrub + md_config_cacher_t<int64_t> osd_scrub_chunk_max; + /// initial (& max) number of objects to scrub in one pass - shallow + md_config_cacher_t<int64_t> osd_shallow_scrub_chunk_max; + + /// chunk size won't be reduced (when preempted) below this + /// value (deep scrub) + md_config_cacher_t<int64_t> osd_scrub_chunk_min; + /// chunk size won't be reduced below this value (shallow scrub) + md_config_cacher_t<int64_t> osd_shallow_scrub_chunk_min; + + /// stats update (publish_stats_to_osd()) interval while scrubbing + md_config_cacher_t<int64_t> osd_stats_update_period_scrubbing; + /// stats update interval while not scrubbing + md_config_cacher_t<int64_t> osd_stats_update_period_not_scrubbing; + // ------------ members used if we are a replica epoch_t m_replica_min_epoch; ///< the min epoch needed to handle this message @@ -991,6 +1011,9 @@ class PgScrubber : public ScrubPgIF, mutable ceph::mutex m_preemption_lock = ceph::make_mutex("preemption_lock"); bool m_preemptable{false}; bool m_preempted{false}; + + /// the number of preemptions allowed before we start blocking + md_config_cacher_t<uint64_t> osd_scrub_max_preemptions; int m_left; size_t m_size_divisor{1}; bool are_preemptions_left() const { return m_left > 0; } diff --git a/src/pybind/mgr/balancer/module.py b/src/pybind/mgr/balancer/module.py index c98ce9aec41..476304275c1 100644 --- a/src/pybind/mgr/balancer/module.py +++ b/src/pybind/mgr/balancer/module.py @@ -325,6 +325,11 @@ class Module(MgrModule): type='str', default='', desc='pools which the automatic balancing will be limited to', + runtime=True), + Option(name='update_pg_upmap_activity', + type='bool', + default=False, + desc='Updates pg_upmap activity stats to be used in `balancer status detail`', runtime=True) ] @@ -339,12 +344,10 @@ class Module(MgrModule): no_optimization_needed = False success_string = 'Optimization plan created successfully' in_progress_string = 'in progress' - last_pg_upmap: List[Dict[str, Any]] = [] pg_upmap_items_added: List[Dict[str, Any]] = [] pg_upmap_items_removed: List[Dict[str, Any]] = [] - last_pg_upmap_primaries: List[Dict[str, Any]] = [] pg_upmap_primaries_added: List[Dict[str, Any]] = [] - pg_upmap_activity_initalized = False + pg_upmap_primaries_removed: List[Dict[str, Any]] = [] def __init__(self, *args: Any, **kwargs: Any) -> None: super(Module, self).__init__(*args, **kwargs) @@ -371,6 +374,11 @@ class Module(MgrModule): """ Show balancer status (detailed) """ + pg_upmap_activity = cast(bool, self.get_module_option('update_pg_upmap_activity')) + if not pg_upmap_activity: + msg = 'This command is disabled.\n' \ + 'To enable, run `ceph config set mgr mgr/balancer/update_pg_upmap_activity True`.\n' + return 0, msg, '' s = { 'plans': list(self.plans.keys()), 'active': self.active, @@ -665,7 +673,9 @@ class Module(MgrModule): if not plan_: return (-errno.ENOENT, '', f'plan {plan} not found') r, detail = self.execute(plan_) - self.update_pg_upmap_activity() # update pg activity in `balancer status detail` + pg_upmap_activity = cast(bool, self.get_module_option('update_pg_upmap_activity')) + if pg_upmap_activity: + self.update_pg_upmap_activity(plan_) # update pg activity in `balancer status detail` self.plan_rm(plan) return (r, '', detail) @@ -757,7 +767,9 @@ class Module(MgrModule): self.execute(plan) else: self.optimize_result = detail - self.update_pg_upmap_activity() # update pg activity in `balancer status detail` + pg_upmap_activity = cast(bool, self.get_module_option('update_pg_upmap_activity')) + if pg_upmap_activity: + self.update_pg_upmap_activity(plan) # update pg activity in `balancer status detail` self.optimizing = False self.log.debug('Sleeping for %d', sleep_interval) self.event.wait(sleep_interval) @@ -1582,22 +1594,16 @@ class Module(MgrModule): 'mode': self.mode, } - def update_pg_upmap_activity(self) -> None: - osdmap = self.get_osdmap() - if not self.pg_upmap_activity_initalized: - self.last_pg_upmap = osdmap.dump().get('pg_upmap_items', '') - self.last_pg_upmap_primaries = osdmap.dump().get('pg_upmap_primaries', '') - self.pg_upmap_activity_initalized = True + def update_pg_upmap_activity(self, plan: Plan) -> None: + incdump = plan.inc.dump() # update pg_upmap_items - self.pg_upmap_items_added = [pg for pg in osdmap.dump().get('pg_upmap_items', '') if pg not in self.last_pg_upmap] - self.pg_upmap_items_removed = [pg for pg in self.last_pg_upmap if pg not in osdmap.dump().get('pg_upmap_items', '')] - self.last_pg_upmap = osdmap.dump().get('pg_upmap_items', '') + self.pg_upmap_items_added = incdump.get('new_pg_upmap_items', []) + self.pg_upmap_items_removed = incdump.get('old_pg_upmap_items', []) # update pg_upmap_primaries - self.pg_upmap_primaries_added = [pg for pg in osdmap.dump().get('pg_upmap_primaries', '') if pg not in self.last_pg_upmap_primaries] - self.pg_upmap_primaries_removed = [pg for pg in self.last_pg_upmap_primaries if pg not in osdmap.dump().get('pg_upmap_primaries', '')] - self.last_pg_upmap_primaries = osdmap.dump().get('pg_upmap_primaries', '') + self.pg_upmap_primaries_added = incdump.get('new_pg_upmap_primaries', []) + self.pg_upmap_primaries_removed = incdump.get('old_pg_upmap_primaries', []) def self_test(self) -> None: # turn balancer on diff --git a/src/pybind/mgr/cephadm/cert_mgr.py b/src/pybind/mgr/cephadm/cert_mgr.py index 9b68e85ca44..0c56c704788 100644 --- a/src/pybind/mgr/cephadm/cert_mgr.py +++ b/src/pybind/mgr/cephadm/cert_mgr.py @@ -1,6 +1,6 @@ from cephadm.ssl_cert_utils import SSLCerts, SSLConfigException -from typing import TYPE_CHECKING, Tuple, Union, List +from typing import TYPE_CHECKING, Tuple, Union, List, Optional if TYPE_CHECKING: from cephadm.module import CephadmOrchestrator @@ -28,5 +28,10 @@ class CertMgr: def get_root_ca(self) -> str: return self.ssl_certs.get_root_cert() - def generate_cert(self, host_fqdn: Union[str, List[str]], node_ip: Union[str, List[str]]) -> Tuple[str, str]: - return self.ssl_certs.generate_cert(host_fqdn, node_ip) + def generate_cert( + self, + host_fqdn: Union[str, List[str]], + node_ip: Union[str, List[str]], + custom_san_list: Optional[List[str]] = None, + ) -> Tuple[str, str]: + return self.ssl_certs.generate_cert(host_fqdn, node_ip, custom_san_list=custom_san_list) diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index 00410c1bc38..d3681cbe60a 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -638,6 +638,9 @@ class TunedProfileStore(): logger.error( f'Attempted to set setting "{setting}" for nonexistent os tuning profile "{profile}"') + def add_settings(self, profile: str, settings: dict) -> None: + self.process_settings(profile, settings, action='add') + def rm_setting(self, profile: str, setting: str) -> None: if profile in self.profiles: if setting in self.profiles[profile].settings: @@ -651,6 +654,39 @@ class TunedProfileStore(): logger.error( f'Attempted to remove setting "{setting}" from nonexistent os tuning profile "{profile}"') + def rm_settings(self, profile: str, settings: List[str]) -> None: + self.process_settings(profile, settings, action='remove') + + def process_settings(self, profile: str, settings: Union[dict, list], action: str) -> None: + """ + Process settings by either adding or removing them based on the action specified. + """ + if profile not in self.profiles: + logger.error(f'Attempted to {action} settings for nonexistent os tuning profile "{profile}"') + return + profile_settings = self.profiles[profile].settings + if action == 'remove' and isinstance(settings, list): + invalid_settings = [s for s in settings if '=' in s or s not in profile_settings] + if invalid_settings: + raise OrchestratorError( + f"Invalid settings: {', '.join(invalid_settings)}. " + "Ensure settings are specified without '=' and exist in the profile. Correct format: key1,key2" + ) + if action == 'add' and isinstance(settings, dict): + for setting, value in settings.items(): + self.profiles[profile].settings[setting] = value + elif action == 'remove' and isinstance(settings, list): + for setting in settings: + self.profiles[profile].settings.pop(setting, '') + else: + logger.error( + f'Invalid action "{action}" for settings modification for tuned profile ' + f'"{profile}". Valid actions are "add" and "remove"' + ) + return + self.profiles[profile]._last_updated = datetime_to_str(datetime_now()) + self.save() + def add_profile(self, spec: TunedProfileSpec) -> None: spec._last_updated = datetime_to_str(datetime_now()) self.profiles[spec.profile_name] = spec diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index e93a286cec6..e851f1ee3fc 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -3499,6 +3499,33 @@ Then run the following: return f'Added setting {setting} with value {value} to tuned profile {profile_name}' @handle_orch_error + def tuned_profile_add_settings(self, profile_name: str, settings: dict) -> str: + if profile_name not in self.tuned_profiles: + raise OrchestratorError( + f"Tuned profile {profile_name} does not exist. Cannot add setting." + ) + self.tuned_profiles.add_settings(profile_name, settings) + results = [ + f"Added setting {key} with value {value} to tuned profile {profile_name}" + for key, value in settings.items() + ] + self._kick_serve_loop() + return "\n".join(results) + + @handle_orch_error + def tuned_profile_rm_settings(self, profile_name: str, settings: List[str]) -> str: + if profile_name not in self.tuned_profiles: + raise OrchestratorError( + f"Tuned profile {profile_name} does not exist. Cannot remove setting." + ) + self.tuned_profiles.rm_settings(profile_name, settings) + results = [ + f'Removed setting {settings} from tuned profile {profile_name}' + ] + self._kick_serve_loop() + return "\n".join(results) + + @handle_orch_error def tuned_profile_rm_setting(self, profile_name: str, setting: str) -> str: if profile_name not in self.tuned_profiles: raise OrchestratorError( diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 4a7959ae045..8e9cd00fa81 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -1436,8 +1436,24 @@ class CephadmServe: config_blobs=daemon_spec.final_config, ).dump_json_str(), use_current_daemon_image=reconfig, + error_ok=True ) + # return number corresponding to DAEMON_FAILED_ERROR + # in src/cephadm/cephadmlib/constants. + # TODO: link these together so one cannot be changed without the other + if code == 17: + # daemon failed on systemctl start command, meaning while + # deployment failed the daemon is present and we should handle + # this as if the deploy command "succeeded" and mark the daemon + # as failed later when we fetch its status + self.mgr.log.error(f'Deployment of {daemon_spec.name()} failed during "systemctl start" command') + elif code: + # some other failure earlier in the deploy process. Just raise an exception + # the same as we would in _run_cephadm on a nonzero rc + raise OrchestratorError( + f'cephadm exited with an error code: {code}, stderr: {err}') + if daemon_spec.daemon_type == 'agent': self.mgr.agent_cache.agent_timestamp[daemon_spec.host] = datetime_now() self.mgr.agent_cache.agent_counter[daemon_spec.host] = 1 diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index 9043577bc5a..04f5af28a9b 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -1015,12 +1015,6 @@ class RgwService(CephService): # set rgw_realm rgw_zonegroup and rgw_zone, if present self.set_realm_zg_zone(spec) - if spec.generate_cert and not spec.rgw_frontend_ssl_certificate: - # generate a self-signed cert for the rgw service - cert, key = self.mgr.cert_mgr.ssl_certs.generate_root_cert(custom_san_list=spec.zonegroup_hostnames) - spec.rgw_frontend_ssl_certificate = ''.join([key, cert]) - self.mgr.spec_store.save(spec) - if spec.rgw_frontend_ssl_certificate: if isinstance(spec.rgw_frontend_ssl_certificate, list): cert_data = '\n'.join(spec.rgw_frontend_ssl_certificate) @@ -1068,6 +1062,19 @@ class RgwService(CephService): # and it matches the spec. port = spec.get_port() + if spec.generate_cert: + cert, key = self.mgr.cert_mgr.generate_cert( + daemon_spec.host, + self.mgr.inventory.get_addr(daemon_spec.host), + custom_san_list=spec.zonegroup_hostnames + ) + pem = ''.join([key, cert]) + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config-key set', + 'key': f'rgw/cert/{daemon_spec.name()}', + 'val': pem, + }) + # configure frontend args = [] ftype = spec.rgw_frontend_type or "beast" @@ -1078,7 +1085,10 @@ class RgwService(CephService): f"ssl_endpoint={build_url(host=daemon_spec.ip, port=port).lstrip('/')}") else: args.append(f"ssl_port={port}") - args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}") + if spec.generate_cert: + args.append(f"ssl_certificate=config://rgw/cert/{daemon_spec.name()}") + else: + args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}") else: if daemon_spec.ip: args.append(f"endpoint={build_url(host=daemon_spec.ip, port=port).lstrip('/')}") @@ -1091,7 +1101,10 @@ class RgwService(CephService): args.append(f"port={build_url(host=daemon_spec.ip, port=port).lstrip('/')}s") else: args.append(f"port={port}s") # note the 's' suffix on port - args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}") + if spec.generate_cert: + args.append(f"ssl_certificate=config://rgw/cert/{daemon_spec.name()}") + else: + args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}") else: if daemon_spec.ip: args.append(f"port={build_url(host=daemon_spec.ip, port=port).lstrip('/')}") @@ -1180,6 +1193,10 @@ class RgwService(CephService): 'who': utils.name_to_config_section(daemon.name()), 'name': 'rgw_frontends', }) + self.mgr.check_mon_command({ + 'prefix': 'config-key rm', + 'key': f'rgw/cert/{daemon.name()}', + }) def ok_to_stop( self, diff --git a/src/pybind/mgr/cephadm/services/nvmeof.py b/src/pybind/mgr/cephadm/services/nvmeof.py index 13f75881453..418be93b6af 100644 --- a/src/pybind/mgr/cephadm/services/nvmeof.py +++ b/src/pybind/mgr/cephadm/services/nvmeof.py @@ -66,6 +66,10 @@ class NvmeofService(CephService): daemon_spec.keyring = keyring daemon_spec.extra_files = {'ceph-nvmeof.conf': gw_conf} + # Indicate to the daemon whether to utilize huge pages + if spec.spdk_mem_size: + daemon_spec.extra_files['spdk_mem_size'] = str(spec.spdk_mem_size) + if spec.enable_auth: if ( not spec.client_cert diff --git a/src/pybind/mgr/cephadm/ssl_cert_utils.py b/src/pybind/mgr/cephadm/ssl_cert_utils.py index 930b276c8de..467b32a4df0 100644 --- a/src/pybind/mgr/cephadm/ssl_cert_utils.py +++ b/src/pybind/mgr/cephadm/ssl_cert_utils.py @@ -70,7 +70,12 @@ class SSLCerts: return (cert_str, key_str) - def generate_cert(self, _hosts: Union[str, List[str]], _addrs: Union[str, List[str]]) -> Tuple[str, str]: + def generate_cert( + self, + _hosts: Union[str, List[str]], + _addrs: Union[str, List[str]], + custom_san_list: Optional[List[str]] = None, + ) -> Tuple[str, str]: addrs = [_addrs] if isinstance(_addrs, str) else _addrs hosts = [_hosts] if isinstance(_hosts, str) else _hosts @@ -97,6 +102,8 @@ class SSLCerts: san_list: List[x509.GeneralName] = [x509.DNSName(host) for host in hosts] if valid_ips: san_list.extend(ips) + if custom_san_list: + san_list.extend([x509.DNSName(n) for n in custom_san_list]) builder = builder.add_extension( x509.SubjectAlternativeName( diff --git a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 index 760bc97e515..73f36952ee7 100644 --- a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 @@ -8,6 +8,7 @@ enable_auth = {{ spec.enable_auth }} state_update_notify = {{ spec.state_update_notify }} state_update_interval_sec = {{ spec.state_update_interval_sec }} enable_spdk_discovery_controller = {{ spec.enable_spdk_discovery_controller }} +enable_key_encryption = {{ spec.enable_key_encryption }} enable_prometheus_exporter = {{ spec.enable_prometheus_exporter }} prometheus_exporter_ssl = False prometheus_port = 10008 @@ -22,6 +23,10 @@ ping_spdk_under_lock = {{ spec.ping_spdk_under_lock }} enable_monitor_client = {{ spec.enable_monitor_client }} max_hosts_per_namespace = {{ spec.max_hosts_per_namespace }} max_namespaces_with_netmask = {{ spec.max_namespaces_with_netmask }} +max_subsystems = {{ spec.max_subsystems }} +max_namespaces = {{ spec.max_namespaces }} +max_namespaces_per_subsystem = {{ spec.max_namespaces_per_subsystem }} +max_hosts_per_subsystem = {{ spec.max_hosts_per_subsystem }} [gateway-logs] log_level = {{ spec.log_level }} @@ -65,6 +70,9 @@ protocol_log_level = {{ spec.spdk_protocol_log_level }} log_file_dir = {{ spec.spdk_log_file_dir }} {% endif %} conn_retries = {{ spec.conn_retries }} +{% if spec.spdk_mem_size %} +mem_size = {{ spec.spdk_mem_size }} +{% endif %} transports = {{ spec.transports }} {% if transport_tcp_options %} transport_tcp_options = {{ transport_tcp_options }} diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 6d0c00d408c..3c647476e44 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -136,7 +136,7 @@ def with_osd_daemon(cephadm_module: CephadmOrchestrator, _run_cephadm, host: str mock.call(host, 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], no_fsid=False, error_ok=False, image='', log_output=True, use_current_daemon_image=False), - mock.call(host, f'osd.{osd_id}', ['_orch', 'deploy'], [], stdin=mock.ANY, use_current_daemon_image=False), + mock.call(host, f'osd.{osd_id}', ['_orch', 'deploy'], [], stdin=mock.ANY, error_ok=True, use_current_daemon_image=False), mock.call(host, 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], no_fsid=False, error_ok=False, image='', log_output=True, use_current_daemon_image=False), @@ -499,7 +499,7 @@ class TestCephadm(object): CephadmServe(cephadm_module)._check_daemons() - assert _save_host.called_with('test') + _save_host.assert_called_with('test') assert cephadm_module.cache.get_scheduled_daemon_action('test', daemon_name) is None @mock.patch("cephadm.serve.CephadmServe._run_cephadm") @@ -563,6 +563,7 @@ class TestCephadm(object): }, }, }), + error_ok=True, use_current_daemon_image=True, ) @@ -618,6 +619,7 @@ class TestCephadm(object): "crush_location": "datacenter=a", }, }), + error_ok=True, use_current_daemon_image=False, ) @@ -660,6 +662,7 @@ class TestCephadm(object): "keyring": "[client.crash.test]\nkey = None\n", }, }), + error_ok=True, use_current_daemon_image=False, ) @@ -702,6 +705,7 @@ class TestCephadm(object): }, "config_blobs": {}, }), + error_ok=True, use_current_daemon_image=False, ) @@ -752,6 +756,7 @@ class TestCephadm(object): }, "config_blobs": {}, }), + error_ok=True, use_current_daemon_image=False, ) @@ -806,6 +811,7 @@ class TestCephadm(object): }, "config_blobs": {}, }), + error_ok=True, use_current_daemon_image=False, ) @@ -2849,15 +2855,15 @@ Traceback (most recent call last): # pass force=true in these tests to bypass _admin label check with with_host(cephadm_module, 'test', refresh_hosts=False, rm_with_force=True): cephadm_module.drain_host('test', force=True, zap_osd_devices=False) - assert _rm_osds.called_with([], zap=False) + _rm_osds.assert_called_with([], zap=False) with with_host(cephadm_module, 'test', refresh_hosts=False, rm_with_force=True): cephadm_module.drain_host('test', force=True, zap_osd_devices=True) - assert _rm_osds.called_with([], zap=True) + _rm_osds.assert_called_with([], zap=True) with pytest.raises(OrchestratorError, match=r"Cannot find host 'host1' in the inventory."): cephadm_module.drain_host('host1', force=True, zap_osd_devices=True) - assert _rm_osds.called_with([], zap=True) + _rm_osds.assert_called_with([], zap=True) def test_process_ls_output(self, cephadm_module): sample_ls_output = """[ diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index 824e37cf4d4..75c7c3c5bf7 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -349,6 +349,7 @@ log_to_file = False""" }, } }), + error_ok=True, use_current_daemon_image=False, ) @@ -397,6 +398,7 @@ enable_auth = False state_update_notify = True state_update_interval_sec = 5 enable_spdk_discovery_controller = False +enable_key_encryption = True enable_prometheus_exporter = True prometheus_exporter_ssl = False prometheus_port = 10008 @@ -409,8 +411,12 @@ allowed_consecutive_spdk_ping_failures = 1 spdk_ping_interval_in_seconds = 2.0 ping_spdk_under_lock = False enable_monitor_client = True -max_hosts_per_namespace = 1 +max_hosts_per_namespace = 8 max_namespaces_with_netmask = 1000 +max_subsystems = 128 +max_namespaces = 1024 +max_namespaces_per_subsystem = 256 +max_hosts_per_subsystem = 32 [gateway-logs] log_level = INFO @@ -489,6 +495,7 @@ timeout = 1.0\n""" } } }), + error_ok=True, use_current_daemon_image=False, ) @@ -601,6 +608,7 @@ class TestMonitoring: "use_url_prefix": False, } }), + error_ok=True, use_current_daemon_image=False, ) @@ -696,6 +704,7 @@ class TestMonitoring: "use_url_prefix": True, } }), + error_ok=True, use_current_daemon_image=False, ) @@ -788,6 +797,7 @@ class TestMonitoring: "use_url_prefix": False, } }), + error_ok=True, use_current_daemon_image=False, ) @@ -833,6 +843,7 @@ class TestMonitoring: "files": { "ceph-exporter.crt": "mycert", "ceph-exporter.key": "mykey"}}}), + error_ok=True, use_current_daemon_image=False) @patch("cephadm.serve.CephadmServe._run_cephadm") @@ -876,6 +887,7 @@ class TestMonitoring: }, "config_blobs": {} }), + error_ok=True, use_current_daemon_image=False, ) @@ -936,6 +948,7 @@ class TestMonitoring: 'web_config': '/etc/node-exporter/web.yml', } }), + error_ok=True, use_current_daemon_image=False, ) @@ -1071,6 +1084,7 @@ class TestMonitoring: "use_url_prefix": False }, }), + error_ok=True, use_current_daemon_image=False, ) @@ -1303,6 +1317,7 @@ class TestMonitoring: "use_url_prefix": False }, }), + error_ok=True, use_current_daemon_image=False, ) @@ -1379,6 +1394,7 @@ class TestMonitoring: }, }, }), + error_ok=True, use_current_daemon_image=False, ) @@ -1438,6 +1454,7 @@ class TestMonitoring: }, }, }), + error_ok=True, use_current_daemon_image=False, ) @@ -1588,6 +1605,7 @@ class TestMonitoring: "files": files, }, }), + error_ok=True, use_current_daemon_image=False, ) @@ -1718,6 +1736,7 @@ class TestMonitoring: "files": files, }, }), + error_ok=True, use_current_daemon_image=False, ) @@ -1831,6 +1850,7 @@ class TestMonitoring: "files": files, }, }), + error_ok=True, use_current_daemon_image=False, ) @@ -2005,6 +2025,7 @@ spec: }, "config_blobs": {}, }), + error_ok=True, use_current_daemon_image=False, ) @@ -2112,6 +2133,7 @@ class TestSNMPGateway: }, "config_blobs": config, }), + error_ok=True, use_current_daemon_image=False, ) @@ -2160,6 +2182,7 @@ class TestSNMPGateway: }, "config_blobs": config, }), + error_ok=True, use_current_daemon_image=False, ) @@ -2212,6 +2235,7 @@ class TestSNMPGateway: }, "config_blobs": config, }), + error_ok=True, use_current_daemon_image=False, ) @@ -2269,6 +2293,7 @@ class TestSNMPGateway: }, "config_blobs": config, }), + error_ok=True, use_current_daemon_image=False, ) @@ -3361,6 +3386,7 @@ class TestJaeger: }, "config_blobs": config, }), + error_ok=True, use_current_daemon_image=False, ) @@ -3401,6 +3427,7 @@ class TestJaeger: }, "config_blobs": es_config, }), + error_ok=True, use_current_daemon_image=False, ) with with_service(cephadm_module, collector_spec): @@ -3429,6 +3456,7 @@ class TestJaeger: }, "config_blobs": collector_config, }), + error_ok=True, use_current_daemon_image=False, ) @@ -3469,6 +3497,7 @@ class TestJaeger: }, "config_blobs": collector_config, }), + error_ok=True, use_current_daemon_image=False, ) with with_service(cephadm_module, agent_spec): @@ -3497,6 +3526,7 @@ class TestJaeger: }, "config_blobs": agent_config, }), + error_ok=True, use_current_daemon_image=False, ) @@ -3554,6 +3584,7 @@ class TestCustomContainer: }, } ), + error_ok=True, use_current_daemon_image=False, ) @@ -3641,6 +3672,7 @@ class TestCustomContainer: ['_orch', 'deploy'], [], stdin=json.dumps(expected), + error_ok=True, use_current_daemon_image=False, ) @@ -3694,6 +3726,7 @@ class TestSMB: ['_orch', 'deploy'], [], stdin=json.dumps(expected), + error_ok=True, use_current_daemon_image=False, ) @@ -3766,6 +3799,7 @@ class TestSMB: ['_orch', 'deploy'], [], stdin=json.dumps(expected), + error_ok=True, use_current_daemon_image=False, ) @@ -4011,6 +4045,7 @@ class TestMgmtGateway: ['_orch', 'deploy'], [], stdin=json.dumps(expected), + error_ok=True, use_current_daemon_image=False, ) @@ -4352,6 +4387,7 @@ class TestMgmtGateway: ['_orch', 'deploy'], [], stdin=json.dumps(expected), + error_ok=True, use_current_daemon_image=False, ) @@ -4475,5 +4511,6 @@ class TestMgmtGateway: ['_orch', 'deploy'], [], stdin=json.dumps(expected), + error_ok=True, use_current_daemon_image=False, ) diff --git a/src/pybind/mgr/mgr_module.py b/src/pybind/mgr/mgr_module.py index 29ddff2ffc2..76ad8d9d0ce 100644 --- a/src/pybind/mgr/mgr_module.py +++ b/src/pybind/mgr/mgr_module.py @@ -1285,7 +1285,7 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): if latest < version: raise RuntimeError(f"main.db version is newer ({version}) than module ({latest})") for i in range(version, latest): - self.log.info(f"upgrading main.db for {self.module_name} from {i-1}:{i}") + self.log.info(f"upgrading main.db for {self.module_name} from {i - 1}:{i}") for sql in self.SCHEMA_VERSIONED[i]: db.execute(sql) if version < latest: diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index efe0de55bd0..a505801eea5 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -901,10 +901,18 @@ class Orchestrator(object): """Change/Add a specific setting for a tuned profile""" raise NotImplementedError() + def tuned_profile_add_settings(self, profile_name: str, setting: dict) -> OrchResult[str]: + """Change/Add multiple settings for a tuned profile""" + raise NotImplementedError() + def tuned_profile_rm_setting(self, profile_name: str, setting: str) -> OrchResult[str]: """Remove a specific setting for a tuned profile""" raise NotImplementedError() + def tuned_profile_rm_settings(self, profile_name: str, settings: List[str]) -> OrchResult[str]: + """Remove multiple settings from a tuned profile""" + raise NotImplementedError + def upgrade_check(self, image: Optional[str], version: Optional[str]) -> OrchResult[str]: raise NotImplementedError() diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index fdb9280d7ed..332bc75d862 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -2250,6 +2250,39 @@ Usage: res = raise_if_exception(completion) return HandleCommandResult(stdout=res) + @_cli_write_command("orch tuned-profile add-settings") + def _tuned_profile_add_settings(self, profile_name: str, settings: str) -> HandleCommandResult: + try: + setting_pairs = settings.split(",") + parsed_setting = {} + parsed_setting = {key.strip(): value.strip() for key, value in (s.split('=', 1) for s in setting_pairs)} + completion = self.tuned_profile_add_settings(profile_name, parsed_setting) + res = raise_if_exception(completion) + return HandleCommandResult(stdout=res) + except ValueError: + error_message = ( + "Error: Invalid format detected. " + "The correct format is key=value pairs separated by commas," + "e.g., 'vm.swappiness=11,vm.user_reserve_kbytes=116851'" + ) + return HandleCommandResult(stderr=error_message) + + @_cli_write_command("orch tuned-profile rm-settings") + def _tuned_profile_rm_settings(self, profile_name: str, settings: str) -> HandleCommandResult: + try: + setting = [s.strip() for s in settings.split(",") if s.strip()] + if not setting: + raise ValueError( + "Error: Invalid format." + "The correct format is key1,key2" + "e.g., vm.swappiness,vm.user_reserve_kbytes" + ) + completion = self.tuned_profile_rm_settings(profile_name, setting) + res = raise_if_exception(completion) + return HandleCommandResult(stdout=res) + except ValueError as e: + return HandleCommandResult(stderr=str(e)) + def self_test(self) -> None: old_orch = self._select_orchestrator() self._set_backend('') diff --git a/src/python-common/CMakeLists.txt b/src/python-common/CMakeLists.txt index e89bbe2feef..08660342a6a 100644 --- a/src/python-common/CMakeLists.txt +++ b/src/python-common/CMakeLists.txt @@ -3,5 +3,5 @@ distutils_install_module(ceph) if(WITH_TESTS) include(AddCephTest) - add_tox_test(python-common TOX_ENVS py3 lint) + add_tox_test(python-common TOX_ENVS __tox_defaults__) endif() diff --git a/src/python-common/ceph/deployment/drive_group.py b/src/python-common/ceph/deployment/drive_group.py index c68ee01a728..43175aa79fb 100644 --- a/src/python-common/ceph/deployment/drive_group.py +++ b/src/python-common/ceph/deployment/drive_group.py @@ -2,7 +2,7 @@ import enum import yaml from ceph.deployment.inventory import Device -from ceph.deployment.service_spec import ( +from ceph.deployment.service_spec import ( # noqa: F401 (type comments) CustomConfig, GeneralArgList, PlacementSpec, @@ -11,7 +11,7 @@ from ceph.deployment.service_spec import ( from ceph.deployment.hostspec import SpecValidationError try: - from typing import Optional, List, Dict, Any, Union + from typing import Optional, List, Dict, Any, Union # noqa: F401 except ImportError: pass diff --git a/src/python-common/ceph/deployment/drive_selection/filter.py b/src/python-common/ceph/deployment/drive_selection/filter.py index 0da1b5c3901..28f63ddc2f2 100644 --- a/src/python-common/ceph/deployment/drive_selection/filter.py +++ b/src/python-common/ceph/deployment/drive_selection/filter.py @@ -15,12 +15,10 @@ logger = logging.getLogger(__name__) class FilterGenerator(object): - def __init__(self, device_filter): - # type: (DeviceSelection) -> None + def __init__(self, device_filter: DeviceSelection) -> None: self.device_filter = device_filter - def __iter__(self): - # type: () -> Generator[Matcher, None, None] + def __iter__(self) -> Generator[Matcher, None, None]: if self.device_filter.actuators: yield EqualityMatcher('actuators', self.device_filter.actuators) if self.device_filter.size: diff --git a/src/python-common/ceph/deployment/drive_selection/matchers.py b/src/python-common/ceph/deployment/drive_selection/matchers.py index df502410aeb..a6a2147ce9e 100644 --- a/src/python-common/ceph/deployment/drive_selection/matchers.py +++ b/src/python-common/ceph/deployment/drive_selection/matchers.py @@ -1,8 +1,9 @@ # -*- coding: utf-8 -*- -from typing import Tuple, Optional, Any, Union, Iterator +# TODO: remove noqa and update to python3/mypy style type annotations +from typing import Tuple, Optional, Any, Union, Iterator # noqa: F401 -from ceph.deployment.inventory import Device +from ceph.deployment.inventory import Device # noqa: F401 import re import logging diff --git a/src/python-common/ceph/deployment/drive_selection/selector.py b/src/python-common/ceph/deployment/drive_selection/selector.py index 59ebbb6347e..85fc95cf394 100644 --- a/src/python-common/ceph/deployment/drive_selection/selector.py +++ b/src/python-common/ceph/deployment/drive_selection/selector.py @@ -3,7 +3,7 @@ import logging from typing import List, Optional, Dict, Callable from ..inventory import Device -from ..drive_group import DriveGroupSpec, DeviceSelection, DriveGroupValidationError +from ..drive_group import DriveGroupSpec, DeviceSelection, DriveGroupValidationError # noqa: F401 from .filter import FilterGenerator from .matchers import _MatchInvalid diff --git a/src/python-common/ceph/deployment/inventory.py b/src/python-common/ceph/deployment/inventory.py index e2c1a5605f9..29475e94d82 100644 --- a/src/python-common/ceph/deployment/inventory.py +++ b/src/python-common/ceph/deployment/inventory.py @@ -1,5 +1,5 @@ try: - from typing import List, Optional, Dict, Any, Union + from typing import List, Optional, Dict, Any, Union # noqa: F401 except ImportError: pass # for type checking diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index 979c14f7d00..10e9f267940 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -527,8 +527,8 @@ pattern_type=PatternType.fnmatch)) labels = [x for x in strings if 'label:' in x] if len(labels) > 1: raise SpecValidationError('more than one label provided: {}'.format(labels)) - for l in labels: - strings.remove(l) + for lbl in labels: + strings.remove(lbl) label = labels[0][6:] if labels else None host_patterns = strings @@ -701,7 +701,7 @@ class ArgumentSpec: if isinstance(data, str): return cls(data, split=True, origin=cls.OriginalType.STRING) if 'argument' not in data: - raise SpecValidationError(f'ArgumentSpec must have an "argument" field') + raise SpecValidationError('ArgumentSpec must have an "argument" field') for k in data.keys(): if k not in cls._fields: raise SpecValidationError(f'ArgumentSpec got an unknown field {k!r}') @@ -1313,6 +1313,10 @@ class RGWSpec(ServiceSpec): raise SpecValidationError('"ssl" field must be set to true when "generate_cert" ' 'is set to true') + if self.generate_cert and self.rgw_frontend_ssl_certificate: + raise SpecValidationError('"generate_cert" field and "rgw_frontend_ssl_certificate" ' + 'field are mutually exclusive') + yaml.add_representer(RGWSpec, ServiceSpec.yaml_representer) @@ -1330,6 +1334,7 @@ class NvmeofServiceSpec(ServiceSpec): state_update_notify: Optional[bool] = True, state_update_interval_sec: Optional[int] = 5, enable_spdk_discovery_controller: Optional[bool] = False, + enable_key_encryption: Optional[bool] = True, omap_file_lock_duration: Optional[int] = 20, omap_file_lock_retries: Optional[int] = 30, omap_file_lock_retry_sleep_interval: Optional[float] = 1.0, @@ -1340,14 +1345,20 @@ class NvmeofServiceSpec(ServiceSpec): allowed_consecutive_spdk_ping_failures: Optional[int] = 1, spdk_ping_interval_in_seconds: Optional[float] = 2.0, ping_spdk_under_lock: Optional[bool] = False, - max_hosts_per_namespace: Optional[int] = 1, + max_hosts_per_namespace: Optional[int] = 8, max_namespaces_with_netmask: Optional[int] = 1000, + max_subsystems: Optional[int] = 128, + max_namespaces: Optional[int] = 1024, + max_namespaces_per_subsystem: Optional[int] = 256, + max_hosts_per_subsystem: Optional[int] = 32, server_key: Optional[str] = None, server_cert: Optional[str] = None, client_key: Optional[str] = None, client_cert: Optional[str] = None, root_ca_cert: Optional[str] = None, + # unused and duplicate of tgt_path below, consider removing spdk_path: Optional[str] = None, + spdk_mem_size: Optional[int] = None, tgt_path: Optional[str] = None, spdk_timeout: Optional[float] = 60.0, spdk_log_level: Optional[str] = '', @@ -1409,6 +1420,8 @@ class NvmeofServiceSpec(ServiceSpec): self.state_update_interval_sec = state_update_interval_sec #: ``enable_spdk_discovery_controller`` SPDK or ceph-nvmeof discovery service self.enable_spdk_discovery_controller = enable_spdk_discovery_controller + #: ``enable_key_encryption`` encrypt DHCHAP and PSK keys before saving in OMAP + self.enable_key_encryption = enable_key_encryption #: ``enable_prometheus_exporter`` enables Prometheus exporter self.enable_prometheus_exporter = enable_prometheus_exporter #: ``verify_nqns`` enables verification of subsystem and host NQNs for validity @@ -1425,6 +1438,14 @@ class NvmeofServiceSpec(ServiceSpec): self.max_hosts_per_namespace = max_hosts_per_namespace #: ``max_namespaces_with_netmask`` max number of namespaces which are not auto visible self.max_namespaces_with_netmask = max_namespaces_with_netmask + #: ``max_subsystems`` max number of subsystems + self.max_subsystems = max_subsystems + #: ``max_namespaces`` max number of namespaces on all subsystems + self.max_namespaces = max_namespaces + #: ``max_namespaces_per_subsystem`` max number of namespaces per one subsystem + self.max_namespaces_per_subsystem = max_namespaces_per_subsystem + #: ``max_hosts_per_subsystem`` max number of hosts per subsystems + self.max_hosts_per_subsystem = max_hosts_per_subsystem #: ``allowed_consecutive_spdk_ping_failures`` # of ping failures before aborting gateway self.allowed_consecutive_spdk_ping_failures = allowed_consecutive_spdk_ping_failures #: ``spdk_ping_interval_in_seconds`` sleep interval in seconds between SPDK pings @@ -1443,8 +1464,10 @@ class NvmeofServiceSpec(ServiceSpec): self.client_cert = client_cert #: ``root_ca_cert`` CA cert for server/client certs self.root_ca_cert = root_ca_cert - #: ``spdk_path`` path to SPDK + #: ``spdk_path`` path is unused and duplicate of tgt_path below, consider removing self.spdk_path = spdk_path or '/usr/local/bin/nvmf_tgt' + #: ``spdk_mem_size`` memory size in MB for DPDK + self.spdk_mem_size = spdk_mem_size #: ``tgt_path`` nvmeof target path self.tgt_path = tgt_path or '/usr/local/bin/nvmf_tgt' #: ``spdk_timeout`` SPDK connectivity timeout @@ -1613,6 +1636,36 @@ class NvmeofServiceSpec(ServiceSpec): ): raise SpecValidationError("Log file directory backups can't be negative") + if (self.max_hosts_per_namespace and self.max_hosts_per_namespace < 0): + raise SpecValidationError("Max hosts per namespace can't be negative") + + if (self.max_namespaces_with_netmask and self.max_namespaces_with_netmask < 0): + raise SpecValidationError("Max namespaces with netmask can't be negative") + + if not isinstance(self.max_subsystems, int): + raise SpecValidationError("Max subsystems must be an integer") + + if self.max_subsystems <= 0: + raise SpecValidationError("Max subsystems must be greater than zero") + + if not isinstance(self.max_namespaces, int): + raise SpecValidationError("Max namespaces must be an integer") + + if self.max_namespaces <= 0: + raise SpecValidationError("Max namespaces must be greater than zero") + + if not isinstance(self.max_namespaces_per_subsystem, int): + raise SpecValidationError("Max namespaces per subsystem must be an integer") + + if self.max_namespaces_per_subsystem <= 0: + raise SpecValidationError("Max namespaces per subsystem must be greater than zero") + + if not isinstance(self.max_hosts_per_subsystem, int): + raise SpecValidationError("Max hosts per subsystem must be an integer") + + if self.max_hosts_per_subsystem <= 0: + raise SpecValidationError("Max hosts per subsystem must be greater than zero") + if ( self.monitor_timeout and self.monitor_timeout < 0.0 diff --git a/src/python-common/ceph/deployment/translate.py b/src/python-common/ceph/deployment/translate.py index 49fb17da725..9dfe7cfcf81 100644 --- a/src/python-common/ceph/deployment/translate.py +++ b/src/python-common/ceph/deployment/translate.py @@ -5,7 +5,7 @@ try: except ImportError: pass -from ceph.deployment.drive_selection.selector import DriveSelection +from ceph.deployment.drive_selection.selector import DriveSelection # noqa: F401 logger = logging.getLogger(__name__) diff --git a/src/python-common/ceph/fs/earmarking.py b/src/python-common/ceph/fs/earmarking.py index c5d4a59a4d5..f4fd4ddf96c 100644 --- a/src/python-common/ceph/fs/earmarking.py +++ b/src/python-common/ceph/fs/earmarking.py @@ -19,13 +19,25 @@ supported top-level scopes. import errno import enum import logging -from typing import List, NamedTuple, Optional, Tuple +from typing import List, NamedTuple, Optional, Tuple, Protocol log = logging.getLogger(__name__) XATTR_SUBVOLUME_EARMARK_NAME = 'user.ceph.subvolume.earmark' +class FSOperations(Protocol): + """Protocol class representing the file system operations earmarking + classes will perform. + """ + + def setxattr( + self, path: str, key: str, value: bytes, flags: int + ) -> None: ... + + def getxattr(self, path: str, key: str) -> bytes: ... + + class EarmarkTopScope(enum.Enum): NFS = "nfs" SMB = "smb" @@ -53,11 +65,11 @@ class EarmarkParseError(ValueError): class CephFSVolumeEarmarking: - def __init__(self, fs, path: str) -> None: + def __init__(self, fs: FSOperations, path: str) -> None: self.fs = fs self.path = path - def _handle_cephfs_error(self, e: Exception, action: str) -> None: + def _handle_cephfs_error(self, e: Exception, action: str) -> Optional[str]: if isinstance(e, ValueError): raise EarmarkException(errno.EINVAL, f"Invalid earmark specified: {e}") from e elif isinstance(e, OSError): @@ -135,7 +147,7 @@ class CephFSVolumeEarmarking: except Exception as e: return self._handle_cephfs_error(e, "getting") - def set_earmark(self, earmark: str): + def set_earmark(self, earmark: str) -> None: # Validate the earmark before attempting to set it if not self._validate_earmark(earmark): raise EarmarkException( diff --git a/src/python-common/ceph/tests/utils.py b/src/python-common/ceph/tests/utils.py index 04b8a4e3895..20a39e4666b 100644 --- a/src/python-common/ceph/tests/utils.py +++ b/src/python-common/ceph/tests/utils.py @@ -35,8 +35,7 @@ def _mk_device(rotational=True, )] -def _mk_inventory(devices): - # type: (Any) -> List[Device] +def _mk_inventory(devices: Any) -> List[Device]: devs = [] for dev_, name in zip(devices, map(chr, range(ord('a'), ord('z')))): dev = Device.from_json(dev_.to_json()) diff --git a/src/python-common/requirements-lint.txt b/src/python-common/requirements-lint.txt deleted file mode 100644 index 2a7142182c2..00000000000 --- a/src/python-common/requirements-lint.txt +++ /dev/null @@ -1,2 +0,0 @@ -flake8==3.7.8 -rstcheck==3.3.1 diff --git a/src/python-common/tox.ini b/src/python-common/tox.ini index 313a4334d51..e0b59c700ca 100644 --- a/src/python-common/tox.ini +++ b/src/python-common/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py3, mypy, lint +envlist = lint, rstcheck, mypy, py3 skip_missing_interpreters = true [testenv:py3] @@ -26,9 +26,13 @@ exclude = __pycache__ [testenv:lint] -deps = - -rrequirements-lint.txt +deps = + flake8 commands = flake8 {posargs:ceph} - rstcheck --report info --debug README.rst +[testenv:rstcheck] +deps = + rstcheck +commands = + rstcheck --report-level info README.rst diff --git a/src/rgw/driver/dbstore/README.md b/src/rgw/driver/dbstore/README.md index f7e5df331cc..2bf3391c8c4 100644 --- a/src/rgw/driver/dbstore/README.md +++ b/src/rgw/driver/dbstore/README.md @@ -15,23 +15,21 @@ Add below cmake option (enabled by default) ## Running Test cluster -Edit ceph.conf to add below option +Edit ceph.conf to add below options [client] rgw backend store = dbstore rgw config store = dbstore -Start vstart cluster +To start the `vstart` cluster, run the following cmd: - MON=1 RGW=1 ../src/vstart.sh -o rgw_backend_store=dbstore -o rgw_config_store=dbstore -n -d + MON=0 OSD=0 MDS=0 MGR=0 RGW=1 ../src/vstart.sh -n -d --rgw_store dbstore -The above vstart command brings up RGW server on dbstore. It creates default zonegroup, zone and few default users (eg., testid) to be used for s3 operations. +The above `vstart` command brings up the RGW server on DBStore without the need for MONs or OSDs. It creates a default zonegroup, zone, and few default users (e.g., `testid`) to be used for S3 operations, and generates database files in the `dev` subdirectory, by default, to store them. -`radosgw-admin` can be used to create and remove other users, zonegroups and zones. - - -By default, dbstore creates .db file *'/var/lib/ceph/radosgw/dbstore-default_ns.db'* to store the data and *'/var/lib/ceph/radosgw/dbstore-config.db'* file to store the configuration. This can be configured using below options in ceph.conf +`radosgw-admin` command can be used to create and remove other users, zonegroups and zones. +The location and prefix for the database files can be configured using the following options: [client] dbstore db dir = <path for the directory for storing the db backend store data> dbstore db name prefix = <prefix to the file names created by db backend store> diff --git a/src/rgw/driver/posix/README.md b/src/rgw/driver/posix/README.md index 02dc8dfbe85..73971edc86f 100644 --- a/src/rgw/driver/posix/README.md +++ b/src/rgw/driver/posix/README.md @@ -23,15 +23,15 @@ Edit ceph.conf to add below option rgw config store = dbstore rgw filter = posix -Start vstart cluster +To start the `vstart` cluster, run the following cmd: - MON=0 OSD=0 MDS=0 MGR=0 RGW=1 ../src/vstart.sh -o rgw_backend_store=dbstore -o rgw_config_store=dbstore -o rgw_filter=posix -n -d + MON=0 OSD=0 MDS=0 MGR=0 RGW=1 ../src/vstart.sh -n -d --rgw_store posix -The above vstart command brings up RGW server on POSIXDriver. It creates default zonegroup, zone and few default users (eg., testid) to be used for s3 operations. +The above vstart command brings up RGW server on POSIXDriver. It creates default zonegroup, zone and few default users (e.g., testid) to be used for s3 operations. -`radosgw-admin` can be used to create and remove other users, zonegroups and zones. +`radosgw-admin` command can be used to create and remove other users, zonegroups and zones. -By default, the directory exported is *'/tmp/rgw_posix_driver'*. This can be changed with the `rgw_posix_base_path` option, either in ceph.conf or on the vstart command line above. +By default, the directory exported, *'rgw_posix_driver'*, is created in the `dev` subdirectory. This can be changed with the `rgw_posix_base_path` option. -The POSIXDriver keeps a LMDB based cache of directories, so that it can provide ordered listings. This directory lives in `rgw_posix_database_root`, which by default is in *'/var/lib/ceph/radosgw'* +The POSIXDriver keeps a LMDB based cache of directories, so that it can provide ordered listings. This directory lives in `rgw_posix_database_root`, which by default is created in the `dev` subdirectory diff --git a/src/rgw/driver/rados/rgw_notify.cc b/src/rgw/driver/rados/rgw_notify.cc index 7b31fd72bd4..5734284d1a3 100644 --- a/src/rgw/driver/rados/rgw_notify.cc +++ b/src/rgw/driver/rados/rgw_notify.cc @@ -21,6 +21,7 @@ #include "common/dout.h" #include "rgw_url.h" #include <chrono> +#include <fmt/format.h> #define dout_subsys ceph_subsys_rgw_notification @@ -769,9 +770,10 @@ public: }); // start the worker threads to do the actual queue processing - const std::string WORKER_THREAD_NAME = "notif-worker"; for (auto worker_id = 0U; worker_id < worker_count; ++worker_id) { - workers.emplace_back([this]() { + workers.emplace_back([this,worker_id]() { + const auto thread_name = fmt::format("notif-worker-{}", worker_id); + ceph_pthread_setname(thread_name.c_str()); try { io_context.run(); } catch (const std::exception& err) { @@ -779,11 +781,6 @@ public: throw err; } }); - const auto thread_name = WORKER_THREAD_NAME+std::to_string(worker_id); - if (const auto rc = ceph_pthread_setname(workers.back().native_handle(), thread_name.c_str()); rc != 0) { - ldpp_dout(this, 1) << "ERROR: failed to set notification manager thread name to: " << thread_name - << ". error: " << rc << dendl; - } } ldpp_dout(this, 10) << "INfO: started notification manager with: " << worker_count << " workers" << dendl; } diff --git a/src/rgw/rgw_amqp.cc b/src/rgw/rgw_amqp.cc index 7504d47c6c9..5bc5d173c73 100644 --- a/src/rgw/rgw_amqp.cc +++ b/src/rgw/rgw_amqp.cc @@ -650,6 +650,9 @@ private: // (4) TODO reconnect on connection errors // (5) TODO cleanup timedout callbacks void run() noexcept { + // give the runner thread a name for easier debugging + ceph_pthread_setname("amqp_manager"); + amqp_frame_t frame; while (!stopped) { @@ -838,12 +841,6 @@ public: // This is to prevent rehashing so that iterators are not invalidated // when a new connection is added. connections.max_load_factor(10.0); - // give the runner thread a name for easier debugging - const char* thread_name = "amqp_manager"; - if (const auto rc = ceph_pthread_setname(runner.native_handle(), thread_name); rc != 0) { - ldout(cct, 1) << "ERROR: failed to set amqp manager thread name to: " << thread_name - << ". error: " << rc << dendl; - } } // non copyable diff --git a/src/rgw/rgw_common.cc b/src/rgw/rgw_common.cc index 68fb9a29278..1a59ba02999 100644 --- a/src/rgw/rgw_common.cc +++ b/src/rgw/rgw_common.cc @@ -2994,7 +2994,9 @@ void RGWAccessKey::decode_json(JSONObj *obj) { subuser = user.substr(pos + 1); } } - JSONDecoder::decode_json("active", active, obj); + if (bool tmp = false; JSONDecoder::decode_json("active", tmp, obj)) { + active = tmp; // update only if "active" is present + } JSONDecoder::decode_json("create_date", create_date, obj); } diff --git a/src/rgw/rgw_kafka.cc b/src/rgw/rgw_kafka.cc index c0ec3dc2c55..0807993338d 100644 --- a/src/rgw/rgw_kafka.cc +++ b/src/rgw/rgw_kafka.cc @@ -503,6 +503,7 @@ private: } void run() noexcept { + ceph_pthread_setname("kafka_manager"); while (!stopped) { // publish all messages in the queue @@ -575,12 +576,6 @@ public: // This is to prevent rehashing so that iterators are not invalidated // when a new connection is added. connections.max_load_factor(10.0); - // give the runner thread a name for easier debugging - const char* thread_name = "kafka_manager"; - if (const auto rc = ceph_pthread_setname(runner.native_handle(), thread_name); rc != 0) { - ldout(cct, 1) << "ERROR: failed to set kafka manager thread name to: " << thread_name - << ". error: " << rc << dendl; - } } // non copyable diff --git a/src/rgw/rgw_lua_background.cc b/src/rgw/rgw_lua_background.cc index ef97a5d6f65..c5b815f93f5 100644 --- a/src/rgw/rgw_lua_background.cc +++ b/src/rgw/rgw_lua_background.cc @@ -83,11 +83,6 @@ void Background::start() { } started = true; runner = std::thread(&Background::run, this); - const char* thread_name = "lua_background"; - if (const auto rc = ceph_pthread_setname(runner.native_handle(), thread_name); rc != 0) { - ldout(cct, 1) << "ERROR: failed to set lua background thread name to: " << thread_name - << ". error: " << rc << dendl; - } } void Background::pause() { @@ -127,6 +122,7 @@ const BackgroundMapValue& Background::get_table_value(const std::string& key) co //(2) Executes the script //(3) Sleep (configurable) void Background::run() { + ceph_pthread_setname("lua_background"); const DoutPrefixProvider* const dpp = &dp; lua_state_guard lguard(cct->_conf->rgw_lua_max_memory_per_state, dpp); auto L = lguard.get(); diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 67829e6320a..0dcf1e0f7d5 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -6811,6 +6811,8 @@ void RGWCompleteMultipart::execute(optional_yield y) if (upload->cksum_type != rgw::cksum::Type::none) { op_ret = try_sum_part_cksums(this, s->cct, upload.get(), parts, cksum, y); if (op_ret < 0) { + ldpp_dout(this, 16) << "ERROR: try_sum_part_cksums failed, obj=" + << meta_obj << " ret=" << op_ret << dendl; return; } } @@ -6835,13 +6837,23 @@ void RGWCompleteMultipart::execute(optional_yield y) rgw::putobj::find_hdr_cksum(*(s->info.env)); ldpp_dout_fmt(this, 10, - "INFO: client supplied checksum {}: {}", + "INFO: client supplied checksum {}: {} ", hdr_cksum.header_name(), supplied_cksum); if (! (supplied_cksum.empty()) && (supplied_cksum != armored_cksum)) { - op_ret = -ERR_INVALID_REQUEST; - return; + /* some minio SDK clients assert a checksum that is cryptographically + * valid but omits the part count */ + auto parts_suffix = fmt::format("-{}", parts->parts.size()); + auto suffix_len = armored_cksum->size() - parts_suffix.size(); + if (armored_cksum->compare(0, suffix_len, supplied_cksum) != 0) { + ldpp_dout_fmt(this, 4, + "{} content checksum mismatch" + "\n\tcalculated={} != \n\texpected={}", + hdr_cksum.header_name(), armored_cksum, supplied_cksum); + op_ret = -ERR_INVALID_REQUEST; + return; + } } buffer::list cksum_bl; diff --git a/src/rgw/rgw_ratelimit.h b/src/rgw/rgw_ratelimit.h index 0db1813f050..beb0eb3b1d2 100644 --- a/src/rgw/rgw_ratelimit.h +++ b/src/rgw/rgw_ratelimit.h @@ -239,6 +239,7 @@ class ActiveRateLimiter : public DoutPrefix { std::atomic_uint8_t current_active = 0; std::shared_ptr<RateLimiter> ratelimit[2]; void replace_active() { + ceph_pthread_setname("ratelimit_gc"); using namespace std::chrono_literals; std::unique_lock<std::mutex> lk(cv_m); while (!stopped) { @@ -286,8 +287,5 @@ class ActiveRateLimiter : public DoutPrefix { void start() { ldpp_dout(this, 20) << "starting ratelimit_gc thread" << dendl; runner = std::thread(&ActiveRateLimiter::replace_active, this); - if (const auto rc = ceph_pthread_setname(runner.native_handle(), "ratelimit_gc"); rc != 0) { - ldpp_dout(this, 1) << "ERROR: failed to set ratelimit_gc thread name. error: " << rc << dendl; - } } }; diff --git a/src/rgw/rgw_rest_pubsub.cc b/src/rgw/rgw_rest_pubsub.cc index c0345a4f88a..adfc86d87cb 100644 --- a/src/rgw/rgw_rest_pubsub.cc +++ b/src/rgw/rgw_rest_pubsub.cc @@ -494,11 +494,11 @@ void RGWPSListTopicsOp::execute(optional_yield y) { const RGWPubSub ps(driver, get_account_or_tenant(s->owner.id), *s->penv.site); if (rgw::all_zonegroups_support(*s->penv.site, rgw::zone_features::notification_v2) && - driver->stat_topics_v1(s->bucket->get_tenant(), null_yield, this) == -ENOENT) { - op_ret = ps.get_topics_v1(this, result, y); - } else { + driver->stat_topics_v1(get_account_or_tenant(s->owner.id), null_yield, this) == -ENOENT) { constexpr int max_items = 100; op_ret = ps.get_topics_v2(this, start_token, max_items, result, next_token, y); + } else { + op_ret = ps.get_topics_v1(this, result, y); } // if there are no topics it is not considered an error op_ret = op_ret == -ENOENT ? 0 : op_ret; diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc index a245fca9945..68223405cf4 100644 --- a/src/rgw/rgw_rest_s3.cc +++ b/src/rgw/rgw_rest_s3.cc @@ -2399,11 +2399,17 @@ static void dump_bucket_metadata(req_state *s, rgw::sal::Bucket* bucket, if (s->auth.identity->is_owner_of(bucket->get_owner())) { const auto& user_info = s->user->get_info(); const auto& bucket_quota = s->bucket->get_info().quota; // bucket quota - dump_header(s, "X-RGW-Quota-User-Size", static_cast<long long>(user_info.quota.user_quota.max_size)); - dump_header(s, "X-RGW-Quota-User-Objects", static_cast<long long>(user_info.quota.user_quota.max_objects)); dump_header(s, "X-RGW-Quota-Max-Buckets", static_cast<long long>(user_info.max_buckets)); - dump_header(s, "X-RGW-Quota-Bucket-Size", static_cast<long long>(bucket_quota.max_size)); - dump_header(s, "X-RGW-Quota-Bucket-Objects", static_cast<long long>(bucket_quota.max_objects)); + + if (user_info.quota.user_quota.enabled){ + dump_header(s, "X-RGW-Quota-User-Size", static_cast<long long>(user_info.quota.user_quota.max_size)); + dump_header(s, "X-RGW-Quota-User-Objects", static_cast<long long>(user_info.quota.user_quota.max_objects)); + } + + if (bucket_quota.enabled){ + dump_header(s, "X-RGW-Quota-Bucket-Size", static_cast<long long>(bucket_quota.max_size)); + dump_header(s, "X-RGW-Quota-Bucket-Objects", static_cast<long long>(bucket_quota.max_objects)); + } } } diff --git a/src/test/admin_socket.cc b/src/test/admin_socket.cc index 69a3cbefd0e..dea29f96f11 100644 --- a/src/test/admin_socket.cc +++ b/src/test/admin_socket.cc @@ -27,6 +27,8 @@ #include <sys/un.h> #include <signal.h> +#include <iostream> // for std::cout + using namespace std; class AdminSocketTest diff --git a/src/test/admin_socket_output.h b/src/test/admin_socket_output.h index 1df12e4a9a5..5d22e8757ee 100644 --- a/src/test/admin_socket_output.h +++ b/src/test/admin_socket_output.h @@ -16,6 +16,7 @@ #define CEPH_ADMIN_SOCKET_OUTPUT_H #include <filesystem> +#include <iostream> // for std::cout #include <string> #include <map> #include <set> diff --git a/src/test/bench_log.cc b/src/test/bench_log.cc index 60fda462e87..9e7c02afc41 100644 --- a/src/test/bench_log.cc +++ b/src/test/bench_log.cc @@ -1,6 +1,8 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab +#include <iostream> // for std::cout + #include "include/types.h" #include "common/Thread.h" #include "common/debug.h" diff --git a/src/test/bufferlist.cc b/src/test/bufferlist.cc index 013335d8177..4b3ca95ca6c 100644 --- a/src/test/bufferlist.cc +++ b/src/test/bufferlist.cc @@ -23,6 +23,8 @@ #include <errno.h> #include <sys/uio.h> +#include <iostream> // for std::cout + #include "include/buffer.h" #include "include/buffer_raw.h" #include "include/compat.h" diff --git a/src/test/ceph_argparse.cc b/src/test/ceph_argparse.cc index 738879c5ba8..436ddc86363 100644 --- a/src/test/ceph_argparse.cc +++ b/src/test/ceph_argparse.cc @@ -15,6 +15,7 @@ #include "common/ceph_argparse.h" #include "gtest/gtest.h" +#include <iostream> // for std::cout #include <vector> #include "include/stringify.h" diff --git a/src/test/common/Throttle.cc b/src/test/common/Throttle.cc index b36d0a901de..6ca05f6dae3 100644 --- a/src/test/common/Throttle.cc +++ b/src/test/common/Throttle.cc @@ -23,6 +23,7 @@ #include <signal.h> #include <chrono> +#include <iostream> // for std::cout #include <list> #include <mutex> #include <random> diff --git a/src/test/common/test_cdc.cc b/src/test/common/test_cdc.cc index 620ecf4679f..61a5aa3708c 100644 --- a/src/test/common/test_cdc.cc +++ b/src/test/common/test_cdc.cc @@ -3,6 +3,7 @@ #include <vector> #include <cstring> +#include <iostream> // for std::cout #include <random> #include "include/types.h" diff --git a/src/test/common/test_config.cc b/src/test/common/test_config.cc index a70d567a434..4805c14a32e 100644 --- a/src/test/common/test_config.cc +++ b/src/test/common/test_config.cc @@ -19,6 +19,9 @@ * * */ + +#include <iostream> // for std::cout + #include "common/config_proxy.h" #include "common/errno.h" #include "gtest/gtest.h" diff --git a/src/test/common/test_context.cc b/src/test/common/test_context.cc index 889d000da85..8afded98951 100644 --- a/src/test/common/test_context.cc +++ b/src/test/common/test_context.cc @@ -19,6 +19,9 @@ * * */ + +#include <iostream> // for std::cout + #include "gtest/gtest.h" #include "include/types.h" #include "include/msgr.h" diff --git a/src/test/common/test_shared_cache.cc b/src/test/common/test_shared_cache.cc index 91120c7e59f..b7a392426d5 100644 --- a/src/test/common/test_shared_cache.cc +++ b/src/test/common/test_shared_cache.cc @@ -22,6 +22,9 @@ #include <stdio.h> #include <signal.h> + +#include <iostream> // for std::cout + #include "gtest/gtest.h" #include "common/Thread.h" #include "common/shared_cache.hpp" diff --git a/src/test/common/test_url_escape.cc b/src/test/common/test_url_escape.cc index 6c27b64da7a..52de8db8d9c 100644 --- a/src/test/common/test_url_escape.cc +++ b/src/test/common/test_url_escape.cc @@ -3,6 +3,8 @@ #include "common/url_escape.h" +#include <iostream> // for std::cout + #include "gtest/gtest.h" TEST(url_escape, escape) { diff --git a/src/test/compressor/test_compression.cc b/src/test/compressor/test_compression.cc index 98ef159dfb8..c5e4724cefc 100644 --- a/src/test/compressor/test_compression.cc +++ b/src/test/compressor/test_compression.cc @@ -17,6 +17,9 @@ #include <errno.h> #include <signal.h> #include <stdlib.h> + +#include <iostream> // for std::cout + #include "gtest/gtest.h" #include "common/ceph_context.h" #include "common/config.h" diff --git a/src/test/crimson/seastore/test_object_data_handler.cc b/src/test/crimson/seastore/test_object_data_handler.cc index e7aabf2c8af..5dbc3748e5b 100644 --- a/src/test/crimson/seastore/test_object_data_handler.cc +++ b/src/test/crimson/seastore/test_object_data_handler.cc @@ -218,14 +218,20 @@ struct object_data_handler_test_t: objaddr_t offset, extent_len_t length) { auto ret = with_trans_intr(t, [&](auto &t) { - return tm->get_pins(t, laddr_t::from_byte_offset(offset), length); + auto &layout = onode->get_layout(); + auto odata = layout.object_data.get(); + auto obase = odata.get_reserved_data_base(); + return tm->get_pins(t, (obase + offset).checked_to_laddr(), length); }).unsafe_get(); return ret; } std::list<LBAMappingRef> get_mappings(objaddr_t offset, extent_len_t length) { auto t = create_mutate_transaction(); auto ret = with_trans_intr(*t, [&](auto &t) { - return tm->get_pins(t, laddr_t::from_byte_offset(offset), length); + auto &layout = onode->get_layout(); + auto odata = layout.object_data.get(); + auto obase = odata.get_reserved_data_base(); + return tm->get_pins(t, (obase + offset).checked_to_laddr(), length); }).unsafe_get(); return ret; } @@ -253,12 +259,16 @@ struct object_data_handler_test_t: ObjectDataBlockRef get_extent( Transaction &t, - laddr_t addr, + loffset_t addr, extent_len_t len) { + auto &layout = onode->get_layout(); + auto odata = layout.object_data.get(); + auto obase = odata.get_reserved_data_base(); auto ext = with_trans_intr(t, [&](auto& trans) { - return tm->read_extent<ObjectDataBlock>(trans, addr, len); - }).unsafe_get(); - EXPECT_EQ(addr, ext->get_laddr()); + return tm->read_extent<ObjectDataBlock>( + trans, (obase + addr).checked_to_laddr(), len); + }).unsafe_get(); + EXPECT_EQ((obase + addr).checked_to_laddr(), ext->get_laddr()); return ext; } @@ -798,7 +808,7 @@ TEST_P(object_data_handler_test_t, overwrite_then_read_within_transaction) { auto pins = get_mappings(*t, base, len); assert(pins.size() == 1); auto pin1 = remap_pin(*t, std::move(pins.front()), 4096, 8192); - auto ext = get_extent(*t, laddr_t::from_byte_offset(base + 4096), 4096 * 2); + auto ext = get_extent(*t, base + 4096, 4096 * 2); ASSERT_TRUE(ext->is_exist_clean()); write(*t, base + 4096, 4096, 'y'); ASSERT_TRUE(ext->is_exist_mutation_pending()); diff --git a/src/test/crimson/seastore/test_transaction_manager.cc b/src/test/crimson/seastore/test_transaction_manager.cc index 6ad111dca5b..2d20c5fff94 100644 --- a/src/test/crimson/seastore/test_transaction_manager.cc +++ b/src/test/crimson/seastore/test_transaction_manager.cc @@ -26,6 +26,10 @@ namespace { } } +laddr_t get_laddr_hint(uint64_t offset) { + return laddr_t::from_byte_offset(RootMetaBlock::SIZE + offset); +} + struct test_extent_record_t { test_extent_desc_t desc; unsigned refcount = 0; @@ -67,8 +71,9 @@ struct transaction_manager_test_t : } laddr_t get_random_laddr(size_t block_size, size_t limit) { - return laddr_t::from_byte_offset(block_size * - std::uniform_int_distribution<>(0, (limit / block_size) - 1)(gen)); + auto offset = block_size * + std::uniform_int_distribution<>(0, (limit / block_size) - 1)(gen); + return get_laddr_hint(offset); } char get_random_contents() { @@ -719,7 +724,7 @@ struct transaction_manager_test_t : [this, &overlay](auto &t) { return lba_manager->scan_mappings( t, - L_ADDR_MIN, + get_laddr_hint(0), L_ADDR_MAX, [iter=overlay.begin(), &overlay](auto l, auto p, auto len) mutable { EXPECT_NE(iter, overlay.end()); @@ -830,9 +835,9 @@ struct transaction_manager_test_t : auto t = create_transaction(); auto extent = alloc_extent( t, - laddr_t::from_byte_offset(i * BSIZE), + get_laddr_hint(i * BSIZE), BSIZE); - ASSERT_EQ(laddr_t::from_byte_offset(i * BSIZE), extent->get_laddr()); + ASSERT_EQ(get_laddr_hint(i * BSIZE), extent->get_laddr()); submit_transaction(std::move(t)); } @@ -844,7 +849,7 @@ struct transaction_manager_test_t : boost::make_counting_iterator(0lu), boost::make_counting_iterator(BLOCKS), [this, &t](auto i) { - return tm->read_extent<TestBlock>(t, laddr_t::from_byte_offset(i * BSIZE), BSIZE + return tm->read_extent<TestBlock>(t, get_laddr_hint(i * BSIZE), BSIZE ).si_then([](auto) { return seastar::now(); }); @@ -870,9 +875,9 @@ struct transaction_manager_test_t : auto t = create_transaction(); auto extent = alloc_extent( t, - laddr_t::from_byte_offset(i * BSIZE), + get_laddr_hint(i * BSIZE), BSIZE); - ASSERT_EQ(laddr_t::from_byte_offset(i * BSIZE), extent->get_laddr()); + ASSERT_EQ(get_laddr_hint(i * BSIZE), extent->get_laddr()); if (try_submit_transaction(std::move(t))) break; } @@ -973,6 +978,7 @@ struct transaction_manager_test_t : extent_types_t::ROOT, extent_types_t::LADDR_INTERNAL, extent_types_t::LADDR_LEAF, + extent_types_t::ROOT_META, extent_types_t::OMAP_INNER, extent_types_t::OMAP_LEAF, extent_types_t::ONODE_BLOCK_STAGED, @@ -1346,9 +1352,9 @@ struct transaction_manager_test_t : void test_remap_pin() { run_async([this] { disable_max_extent_size(); - laddr_t l_offset = laddr_t::from_byte_offset(32 << 10); + laddr_t l_offset = get_laddr_hint(32 << 10); size_t l_len = 32 << 10; - laddr_t r_offset = laddr_t::from_byte_offset(64 << 10); + laddr_t r_offset = get_laddr_hint(64 << 10); size_t r_len = 32 << 10; { auto t = create_transaction(); @@ -1400,12 +1406,12 @@ struct transaction_manager_test_t : void test_clone_and_remap_pin() { run_async([this] { disable_max_extent_size(); - laddr_t l_offset = laddr_t::from_byte_offset(32 << 10); + laddr_t l_offset = get_laddr_hint(32 << 10); size_t l_len = 32 << 10; - laddr_t r_offset = laddr_t::from_byte_offset(64 << 10); + laddr_t r_offset = get_laddr_hint(64 << 10); size_t r_len = 32 << 10; - laddr_t l_clone_offset = laddr_t::from_byte_offset(96 << 10); - laddr_t r_clone_offset = laddr_t::from_byte_offset(128 << 10); + laddr_t l_clone_offset = get_laddr_hint(96 << 10); + laddr_t r_clone_offset = get_laddr_hint(128 << 10); { auto t = create_transaction(); auto lext = alloc_extent(t, l_offset, l_len); @@ -1455,11 +1461,11 @@ struct transaction_manager_test_t : void test_overwrite_pin() { run_async([this] { disable_max_extent_size(); - laddr_t m_offset = laddr_t::from_byte_offset(8 << 10); + laddr_t m_offset = get_laddr_hint(8 << 10); size_t m_len = 56 << 10; - laddr_t l_offset = laddr_t::from_byte_offset(64 << 10); + laddr_t l_offset = get_laddr_hint(64 << 10); size_t l_len = 64 << 10; - laddr_t r_offset = laddr_t::from_byte_offset(128 << 10); + laddr_t r_offset = get_laddr_hint(128 << 10); size_t r_len = 64 << 10; { auto t = create_transaction(); @@ -1538,7 +1544,7 @@ struct transaction_manager_test_t : run_async([this] { disable_max_extent_size(); constexpr unsigned REMAP_NUM = 32; - constexpr laddr_t offset = L_ADDR_MIN; + laddr_t offset = get_laddr_hint(0); constexpr size_t length = 256 << 10; { auto t = create_transaction(); @@ -1575,7 +1581,7 @@ struct transaction_manager_test_t : if (off == 0 || off >= 255) { continue; } - auto new_off = laddr_t::from_byte_offset(off << 10) + auto new_off = get_laddr_hint(off << 10) .get_byte_distance<extent_len_t>(last_pin->get_key()); auto new_len = last_pin->get_length() - new_off; //always remap right extent at new split_point @@ -1621,7 +1627,7 @@ struct transaction_manager_test_t : run_async([this] { disable_max_extent_size(); constexpr unsigned REMAP_NUM = 32; - constexpr laddr_t offset = L_ADDR_MIN; + laddr_t offset = get_laddr_hint(0); constexpr size_t length = 256 << 10; { auto t = create_transaction(); @@ -1661,12 +1667,12 @@ struct transaction_manager_test_t : ASSERT_TRUE(!split_points.empty()); while(!split_points.empty()) { // new overwrite area: start_off ~ end_off - auto start_off = split_points.front(); + auto start_off = split_points.front() + 4 /*RootMetaBlock*/; split_points.pop_front(); - auto end_off = split_points.front(); + auto end_off = split_points.front() + 4 /*RootMetaBlock*/; split_points.pop_front(); ASSERT_TRUE(start_off <= end_off); - if ((laddr_t::from_byte_offset(end_off << 10) == pin0->get_key() + pin0->get_length()) + if ((get_laddr_hint(end_off << 10) == pin0->get_key() + pin0->get_length()) || (start_off == end_off)) { if (split_points.empty() && empty_transaction) { early_exit++; @@ -1675,7 +1681,7 @@ struct transaction_manager_test_t : continue; } empty_transaction = false; - auto new_off = laddr_t::from_byte_offset(start_off << 10) + auto new_off = get_laddr_hint(start_off << 10) .get_byte_distance<extent_len_t>(last_rpin->get_key()); auto new_len = (end_off - start_off) << 10; bufferlist bl; @@ -1768,7 +1774,7 @@ struct tm_random_block_device_test_t : TEST_P(tm_random_block_device_test_t, scatter_allocation) { run_async([this] { - laddr_t ADDR = laddr_t::from_byte_offset(0xFF * 4096); + laddr_t ADDR = get_laddr_hint(0xFF * 4096); epm->prefill_fragmented_devices(); auto t = create_transaction(); for (int i = 0; i < 1991; i++) { @@ -1786,7 +1792,7 @@ TEST_P(tm_single_device_test_t, basic) { constexpr size_t SIZE = 4096; run_async([this] { - laddr_t ADDR = laddr_t::from_byte_offset(0xFF * SIZE); + laddr_t ADDR = get_laddr_hint(0xFF * SIZE); { auto t = create_transaction(); auto extent = alloc_extent( @@ -1807,7 +1813,7 @@ TEST_P(tm_single_device_test_t, mutate) { constexpr size_t SIZE = 4096; run_async([this] { - laddr_t ADDR = laddr_t::from_byte_offset(0xFF * SIZE); + laddr_t ADDR = get_laddr_hint(0xFF * SIZE); { auto t = create_transaction(); auto extent = alloc_extent( @@ -1845,8 +1851,8 @@ TEST_P(tm_single_device_test_t, allocate_lba_conflict) { constexpr size_t SIZE = 4096; run_async([this] { - laddr_t ADDR = laddr_t::from_byte_offset(0xFF * SIZE); - laddr_t ADDR2 = laddr_t::from_byte_offset(0xFE * SIZE); + laddr_t ADDR = get_laddr_hint(0xFF * SIZE); + laddr_t ADDR2 = get_laddr_hint(0xFE * SIZE); auto t = create_transaction(); auto t2 = create_transaction(); @@ -1883,7 +1889,7 @@ TEST_P(tm_single_device_test_t, mutate_lba_conflict) for (unsigned i = 0; i < 300; ++i) { auto extent = alloc_extent( t, - laddr_t::from_byte_offset(i * SIZE), + get_laddr_hint(i * SIZE), SIZE); } check_mappings(t); @@ -1891,7 +1897,7 @@ TEST_P(tm_single_device_test_t, mutate_lba_conflict) check(); } - laddr_t ADDR = laddr_t::from_byte_offset(150 * SIZE); + laddr_t ADDR = get_laddr_hint(150 * SIZE); { auto t = create_transaction(); auto t2 = create_transaction(); @@ -1917,15 +1923,15 @@ TEST_P(tm_single_device_test_t, concurrent_mutate_lba_no_conflict) { constexpr size_t SIZE = 4096; constexpr size_t NUM = 500; - laddr_t addr = L_ADDR_MIN; - laddr_t addr2 = laddr_t::from_byte_offset(SIZE * (NUM - 1)); + laddr_t addr = get_laddr_hint(0); + laddr_t addr2 = get_laddr_hint(SIZE * (NUM - 1)); run_async([this, addr, addr2] { { auto t = create_transaction(); for (unsigned i = 0; i < NUM; ++i) { auto extent = alloc_extent( t, - laddr_t::from_byte_offset(i * SIZE), + get_laddr_hint(i * SIZE), SIZE); } submit_transaction(std::move(t)); @@ -1949,7 +1955,7 @@ TEST_P(tm_single_device_test_t, create_remove_same_transaction) { constexpr size_t SIZE = 4096; run_async([this] { - laddr_t ADDR = laddr_t::from_byte_offset(0xFF * SIZE); + laddr_t ADDR = get_laddr_hint(0xFF * SIZE); { auto t = create_transaction(); auto extent = alloc_extent( @@ -1985,7 +1991,7 @@ TEST_P(tm_single_device_test_t, split_merge_read_same_transaction) for (unsigned i = 0; i < 300; ++i) { auto extent = alloc_extent( t, - laddr_t::from_byte_offset(i * SIZE), + get_laddr_hint(i * SIZE), SIZE); } check_mappings(t); @@ -1997,7 +2003,7 @@ TEST_P(tm_single_device_test_t, split_merge_read_same_transaction) for (unsigned i = 0; i < 240; ++i) { dec_ref( t, - laddr_t::from_byte_offset(i * SIZE)); + get_laddr_hint(i * SIZE)); } check_mappings(t); submit_transaction(std::move(t)); @@ -2010,7 +2016,7 @@ TEST_P(tm_single_device_test_t, inc_dec_ref) { constexpr size_t SIZE = 4096; run_async([this] { - laddr_t ADDR = laddr_t::from_byte_offset(0xFF * SIZE); + laddr_t ADDR = get_laddr_hint(0xFF * SIZE); { auto t = create_transaction(); auto extent = alloc_extent( @@ -2061,10 +2067,10 @@ TEST_P(tm_single_device_test_t, cause_lba_split) auto t = create_transaction(); auto extent = alloc_extent( t, - laddr_t::from_byte_offset(i * SIZE), + get_laddr_hint(i * SIZE), SIZE, (char)(i & 0xFF)); - ASSERT_EQ(laddr_t::from_byte_offset(i * SIZE), extent->get_laddr()); + ASSERT_EQ(get_laddr_hint(i * SIZE), extent->get_laddr()); submit_transaction(std::move(t)); } check(); @@ -2082,9 +2088,9 @@ TEST_P(tm_single_device_test_t, random_writes) auto t = create_transaction(); auto extent = alloc_extent( t, - laddr_t::from_byte_offset(i * BSIZE), + get_laddr_hint(i * BSIZE), BSIZE); - ASSERT_EQ(laddr_t::from_byte_offset(i * BSIZE), extent->get_laddr()); + ASSERT_EQ(get_laddr_hint(i * BSIZE), extent->get_laddr()); submit_transaction(std::move(t)); } @@ -2100,7 +2106,7 @@ TEST_P(tm_single_device_test_t, random_writes) // pad out transaction auto paddings = alloc_extents( t, - laddr_t::from_byte_offset(TOTAL + (k * PADDING_SIZE)), + get_laddr_hint(TOTAL + (k * PADDING_SIZE)), PADDING_SIZE); for (auto &padding : paddings) { dec_ref(t, padding->get_laddr()); @@ -2133,7 +2139,7 @@ TEST_P(tm_single_device_test_t, find_hole_assert_trigger) TEST_P(tm_single_device_intergrity_check_test_t, remap_lazy_read) { - constexpr laddr_t offset = L_ADDR_MIN; + laddr_t offset = get_laddr_hint(0); constexpr size_t length = 256 << 10; run_async([this, offset] { disable_max_extent_size(); @@ -2186,7 +2192,7 @@ TEST_P(tm_single_device_test_t, invalid_lba_mapping_detect) for (int i = 0; i < LEAF_NODE_CAPACITY; i++) { auto extent = alloc_extent( t, - laddr_t::from_byte_offset(i * 4096), + get_laddr_hint(i * 4096), 4096, 'a'); } @@ -2195,12 +2201,12 @@ TEST_P(tm_single_device_test_t, invalid_lba_mapping_detect) { auto t = create_transaction(); - auto pin = get_pin(t, laddr_t::from_byte_offset((LEAF_NODE_CAPACITY - 1) * 4096)); + auto pin = get_pin(t, get_laddr_hint((LEAF_NODE_CAPACITY - 1) * 4096)); assert(pin->is_parent_viewable()); - auto extent = alloc_extent(t, laddr_t::from_byte_offset(LEAF_NODE_CAPACITY * 4096), 4096, 'a'); + auto extent = alloc_extent(t, get_laddr_hint(LEAF_NODE_CAPACITY * 4096), 4096, 'a'); assert(!pin->is_parent_viewable()); - pin = get_pin(t, laddr_t::from_byte_offset(LEAF_NODE_CAPACITY * 4096)); - std::ignore = alloc_extent(t, laddr_t::from_byte_offset((LEAF_NODE_CAPACITY + 1) * 4096), 4096, 'a'); + pin = get_pin(t, get_laddr_hint(LEAF_NODE_CAPACITY * 4096)); + std::ignore = alloc_extent(t, get_laddr_hint((LEAF_NODE_CAPACITY + 1) * 4096), 4096, 'a'); assert(pin->is_parent_viewable()); assert(pin->parent_modified()); pin->maybe_fix_pos(); diff --git a/src/test/crypto.cc b/src/test/crypto.cc index 819d41c7218..67fb440eeb9 100644 --- a/src/test/crypto.cc +++ b/src/test/crypto.cc @@ -1,6 +1,8 @@ #include <errno.h> #include <time.h> +#include <iostream> // for std::cout + #include <boost/container/small_vector.hpp> #include "gtest/gtest.h" diff --git a/src/test/daemon_config.cc b/src/test/daemon_config.cc index cdea3b05932..4c7abd70b20 100644 --- a/src/test/daemon_config.cc +++ b/src/test/daemon_config.cc @@ -21,6 +21,8 @@ #include "include/rados/librados.h" #include <errno.h> + +#include <iostream> // for std::cout #include <sstream> #include <string> #include <string.h> diff --git a/src/test/encoding.cc b/src/test/encoding.cc index 3c83716b048..3d508909d6d 100644 --- a/src/test/encoding.cc +++ b/src/test/encoding.cc @@ -4,6 +4,8 @@ #include <fmt/format.h> #include "gtest/gtest.h" +#include <iostream> // for std::cout + using namespace std; template < typename T > diff --git a/src/test/osd/types.cc b/src/test/osd/types.cc index 2dc870411bb..062980d8655 100644 --- a/src/test/osd/types.cc +++ b/src/test/osd/types.cc @@ -23,6 +23,8 @@ #include "common/Thread.h" #include "include/stringify.h" #include "osd/ReplicatedBackend.h" + +#include <iostream> // for std::cout #include <sstream> using namespace std; diff --git a/src/test/osdc/object_cacher_stress.cc b/src/test/osdc/object_cacher_stress.cc index 0bfdd48eb98..b32c5660112 100644 --- a/src/test/osdc/object_cacher_stress.cc +++ b/src/test/osdc/object_cacher_stress.cc @@ -23,6 +23,7 @@ #include "MemWriteback.h" #include <atomic> +#include <iostream> // for std::cout using namespace std; diff --git a/src/test/perf_counters.cc b/src/test/perf_counters.cc index 7ab9561bc19..b75e6a50825 100644 --- a/src/test/perf_counters.cc +++ b/src/test/perf_counters.cc @@ -15,7 +15,7 @@ #include "include/types.h" // FIXME: ordering shouldn't be important, but right // now, this include has to come before the others. - +#include "include/utime.h" #include "common/perf_counters_key.h" #include "common/perf_counters_collection.h" #include "common/admin_socket_client.h" diff --git a/src/test/test_addrs.cc b/src/test/test_addrs.cc index 4062d0431c3..e70d234d743 100644 --- a/src/test/test_addrs.cc +++ b/src/test/test_addrs.cc @@ -17,6 +17,7 @@ #include "msg/msg_types.h" #include "gtest/gtest.h" +#include <iostream> // for std::cout #include <sstream> using namespace std; diff --git a/src/test/test_denc.cc b/src/test/test_denc.cc index 02dd1454ef8..c9144bd05bf 100644 --- a/src/test/test_denc.cc +++ b/src/test/test_denc.cc @@ -15,6 +15,8 @@ */ #include <stdio.h> + +#include <iostream> // for std::cout #include <numeric> #include "global/global_init.h" diff --git a/src/test/test_features.cc b/src/test/test_features.cc index 1ae758bfb34..bdd8838224b 100644 --- a/src/test/test_features.cc +++ b/src/test/test_features.cc @@ -2,6 +2,8 @@ // vim: ts=8 sw=2 smarttab #include <stdio.h> +#include <iostream> // for std::cout + #include "global/global_init.h" #include "common/ceph_argparse.h" #include "common/ceph_releases.h" diff --git a/src/test/test_mempool.cc b/src/test/test_mempool.cc index bb46b19aa4e..b806282d039 100644 --- a/src/test/test_mempool.cc +++ b/src/test/test_mempool.cc @@ -16,6 +16,8 @@ #include <stdio.h> +#include <iostream> // for std::cout + #include "global/global_init.h" #include "common/ceph_argparse.h" #include "global/global_context.h" diff --git a/src/test/test_perf_counters_cache.cc b/src/test/test_perf_counters_cache.cc index 1fa147ee273..fa2d541b7f7 100644 --- a/src/test/test_perf_counters_cache.cc +++ b/src/test/test_perf_counters_cache.cc @@ -4,6 +4,7 @@ #include "global/global_context.h" #include "global/global_init.h" #include "include/msgr.h" // for CEPH_ENTITY_TYPE_CLIENT +#include "include/utime.h" #include "gtest/gtest.h" using namespace ceph::perf_counters; diff --git a/src/test/test_rewrite_latency.cc b/src/test/test_rewrite_latency.cc index 348c8dde5c6..48a95cf183b 100644 --- a/src/test/test_rewrite_latency.cc +++ b/src/test/test_rewrite_latency.cc @@ -1,5 +1,6 @@ #include <unistd.h> +#include <iostream> // for std::cout #include <map> #include <errno.h> diff --git a/src/test/test_snap_mapper.cc b/src/test/test_snap_mapper.cc index a47d2538c3a..7a9ac62defe 100644 --- a/src/test/test_snap_mapper.cc +++ b/src/test/test_snap_mapper.cc @@ -1,4 +1,5 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +#include <iostream> // for std::cout #include <iterator> #include <map> #include <set> diff --git a/src/test/test_striper.cc b/src/test/test_striper.cc index ee70304ebc8..1e5f93a49be 100644 --- a/src/test/test_striper.cc +++ b/src/test/test_striper.cc @@ -3,6 +3,8 @@ #include "osdc/Striper.h" +#include <iostream> // for std::cout + using namespace std; TEST(Striper, Stripe1) diff --git a/src/test/test_utime.cc b/src/test/test_utime.cc index b1cee0e805c..de1d6f46878 100644 --- a/src/test/test_utime.cc +++ b/src/test/test_utime.cc @@ -1,4 +1,7 @@ #include "include/utime.h" + +#include <iostream> // for std::cout + #include "gtest/gtest.h" #include "include/stringify.h" #include "common/ceph_context.h" diff --git a/src/test/test_workqueue.cc b/src/test/test_workqueue.cc index 771b9d65952..5c2fc459da2 100644 --- a/src/test/test_workqueue.cc +++ b/src/test/test_workqueue.cc @@ -1,6 +1,9 @@ #include "gtest/gtest.h" #include "common/WorkQueue.h" + +#include <iostream> // for std::cout + #include "common/ceph_argparse.h" using namespace std; diff --git a/src/test/testcrypto.cc b/src/test/testcrypto.cc index 2efb9b219b9..8e3337babea 100644 --- a/src/test/testcrypto.cc +++ b/src/test/testcrypto.cc @@ -1,6 +1,8 @@ #include "auth/Crypto.h" -#include "common/Clock.h" +#include <iostream> // for std::cout + +#include "common/Clock.h" #include "common/config.h" #include "common/debug.h" diff --git a/src/test/testkeys.cc b/src/test/testkeys.cc index 85d0b56676f..dacddb08786 100644 --- a/src/test/testkeys.cc +++ b/src/test/testkeys.cc @@ -1,4 +1,7 @@ #include "auth/cephx/CephxKeyServer.h" + +#include <iostream> // for std::cout + #include "common/ceph_argparse.h" #include "global/global_init.h" #include "common/config.h" diff --git a/src/tools/radosacl.cc b/src/tools/radosacl.cc index 3bfef8fb157..a6c9b9f8dc4 100644 --- a/src/tools/radosacl.cc +++ b/src/tools/radosacl.cc @@ -16,6 +16,8 @@ #include <time.h> #include <errno.h> +#include <iostream> // for std::cerr + #include "include/types.h" #include "include/rados/librados.hpp" |