summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/ceph-volume/ceph_volume/objectstore/lvmbluestore.py2
-rw-r--r--src/ceph_mds.cc2
-rw-r--r--src/ceph_mgr.cc2
-rw-r--r--src/ceph_mon.cc2
-rw-r--r--src/ceph_nvmeof_monitor_client.cc2
-rwxr-xr-xsrc/cephadm/cephadm.py23
-rw-r--r--src/cephadm/cephadmlib/call_wrappers.py8
-rw-r--r--src/cephadm/cephadmlib/constants.py1
-rw-r--r--src/cephadm/cephadmlib/daemon_identity.py2
-rw-r--r--src/cephadm/cephadmlib/daemons/ingress.py21
-rw-r--r--src/cephadm/cephadmlib/daemons/nfs.py15
-rw-r--r--src/cephadm/cephadmlib/daemons/nvmeof.py41
-rw-r--r--src/cephadm/cephadmlib/data_utils.py5
-rw-r--r--src/cephadm/cephadmlib/exceptions.py13
-rw-r--r--src/cephadm/cephadmlib/file_utils.py17
-rw-r--r--src/cephadm/cephadmlib/systemd.py8
-rw-r--r--src/cephadm/tests/test_agent.py2
-rw-r--r--src/cephadm/tests/test_cephadm.py34
-rw-r--r--src/cephadm/tests/test_deploy.py1
-rw-r--r--src/cephadm/tox.ini14
-rw-r--r--src/client/Client.cc10
-rw-r--r--src/client/MetaSession.cc2
-rw-r--r--src/client/SyntheticClient.cc2
-rw-r--r--src/common/DecayCounter.h3
-rw-r--r--src/common/Graylog.cc3
-rw-r--r--src/common/Journald.cc4
-rw-r--r--src/common/StackStringStream.h3
-rw-r--r--src/common/Thread.cc22
-rw-r--r--src/common/Thread.h12
-rw-r--r--src/common/Throttle.h2
-rw-r--r--src/common/admin_socket.cc6
-rw-r--r--src/common/assert.cc12
-rw-r--r--src/common/buffer.cc2
-rw-r--r--src/common/ceph_argparse.cc1
-rw-r--r--src/common/ceph_argparse.h2
-rw-r--r--src/common/ceph_time.h2
-rw-r--r--src/common/ceph_timer.h2
-rw-r--r--src/common/code_environment.cc7
-rw-r--r--src/common/compat.cc63
-rw-r--r--src/common/config_cacher.h2
-rw-r--r--src/common/error_code.cc3
-rw-r--r--src/common/error_code.h5
-rw-r--r--src/common/obj_bencher.cc4
-rw-r--r--src/common/options.cc1
-rw-r--r--src/common/options.h1
-rw-r--r--src/common/options/mds.yaml.in19
-rw-r--r--src/common/options/osd.yaml.in8
-rw-r--r--src/common/perf_counters.cc1
-rw-r--r--src/common/perf_counters.h5
-rw-r--r--src/common/perf_counters_cache.h1
-rw-r--r--src/common/pick_address.cc2
-rw-r--r--src/crimson/os/alienstore/alien_store.cc13
-rw-r--r--src/crimson/os/alienstore/alien_store.h1
-rw-r--r--src/crimson/os/alienstore/thread_pool.cc2
-rw-r--r--src/crimson/os/cyanstore/cyan_store.cc6
-rw-r--r--src/crimson/os/cyanstore/cyan_store.h2
-rw-r--r--src/crimson/os/futurized_store.h1
-rw-r--r--src/crimson/os/seastore/cache.cc10
-rw-r--r--src/crimson/os/seastore/cache.h7
-rw-r--r--src/crimson/os/seastore/onode.h4
-rw-r--r--src/crimson/os/seastore/random_block_manager/block_rb_manager.cc6
-rw-r--r--src/crimson/os/seastore/root_meta.h76
-rw-r--r--src/crimson/os/seastore/seastore.cc7
-rw-r--r--src/crimson/os/seastore/seastore.h4
-rw-r--r--src/crimson/os/seastore/seastore_types.cc2
-rw-r--r--src/crimson/os/seastore/seastore_types.h61
-rw-r--r--src/crimson/os/seastore/transaction_manager.cc2
-rw-r--r--src/crimson/os/seastore/transaction_manager.h61
-rw-r--r--src/crimson/osd/backfill_state.cc19
-rw-r--r--src/crimson/osd/backfill_state.h9
-rw-r--r--src/crimson/osd/ec_backend.cc1
-rw-r--r--src/crimson/osd/ec_backend.h1
-rw-r--r--src/crimson/osd/ops_executer.cc5
-rw-r--r--src/crimson/osd/ops_executer.h11
-rw-r--r--src/crimson/osd/osd.cc34
-rw-r--r--src/crimson/osd/osd.h1
-rw-r--r--src/crimson/osd/osd_operations/snaptrim_event.cc1
-rw-r--r--src/crimson/osd/pg.cc26
-rw-r--r--src/crimson/osd/pg.h14
-rw-r--r--src/crimson/osd/pg_backend.h1
-rw-r--r--src/crimson/osd/pg_recovery.h8
-rw-r--r--src/crimson/osd/replicated_backend.cc114
-rw-r--r--src/crimson/osd/replicated_backend.h12
-rw-r--r--src/exporter/DaemonMetricCollector.cc31
-rw-r--r--src/exporter/DaemonMetricCollector.h11
-rw-r--r--src/exporter/ceph_exporter.cc46
-rw-r--r--src/exporter/web_server.cc35
-rw-r--r--src/exporter/web_server.h1
-rw-r--r--src/global/signal_handler.cc2
-rw-r--r--src/include/ceph_fs.h4
-rw-r--r--src/include/compat.h51
-rw-r--r--src/include/elist.h4
-rw-r--r--src/include/str_list.h1
-rw-r--r--src/log/Entry.h6
-rw-r--r--src/log/Log.cc33
-rw-r--r--src/log/Log.h8
-rw-r--r--src/mds/Beacon.cc16
-rw-r--r--src/mds/CDir.cc4
-rw-r--r--src/mds/Capability.h2
-rw-r--r--src/mds/Locker.cc14
-rw-r--r--src/mds/MDCache.cc3
-rw-r--r--src/mds/MDLog.cc40
-rw-r--r--src/mds/MDLog.h8
-rw-r--r--src/mds/MDSRank.cc4
-rw-r--r--src/mds/MetricAggregator.cc1
-rw-r--r--src/mds/MetricsHandler.cc1
-rw-r--r--src/mds/Migrator.cc13
-rw-r--r--src/mds/PurgeQueue.cc2
-rw-r--r--src/mds/QuiesceAgent.h2
-rw-r--r--src/mds/QuiesceDbEncoding.h48
-rw-r--r--src/mds/QuiesceDbManager.cc2
-rw-r--r--src/mds/Server.cc30
-rw-r--r--src/mds/Server.h3
-rw-r--r--src/mds/SessionMap.cc1
-rw-r--r--src/mds/SessionMap.h4
-rw-r--r--src/messages/MClientCaps.h15
-rw-r--r--src/mon/MDSMonitor.cc8
-rw-r--r--src/mon/MonMap.cc7
-rwxr-xr-xsrc/mon/NVMeofGwMap.cc43
-rwxr-xr-xsrc/mon/NVMeofGwMap.h5
-rw-r--r--src/mon/NVMeofGwMon.cc6
-rw-r--r--src/msg/async/Stack.h2
-rw-r--r--src/msg/async/rdma/RDMAStack.cc2
-rw-r--r--src/osd/PrimaryLogPG.cc14
-rw-r--r--src/osd/scrubber/osd_scrub.cc2
-rw-r--r--src/osd/scrubber/osd_scrub_sched.cc12
-rw-r--r--src/osd/scrubber/pg_scrubber.cc71
-rw-r--r--src/osd/scrubber/pg_scrubber.h23
-rw-r--r--src/pybind/mgr/balancer/module.py40
-rw-r--r--src/pybind/mgr/cephadm/cert_mgr.py11
-rw-r--r--src/pybind/mgr/cephadm/inventory.py36
-rw-r--r--src/pybind/mgr/cephadm/module.py27
-rw-r--r--src/pybind/mgr/cephadm/serve.py16
-rw-r--r--src/pybind/mgr/cephadm/services/cephadmservice.py33
-rw-r--r--src/pybind/mgr/cephadm/services/nvmeof.py4
-rw-r--r--src/pybind/mgr/cephadm/ssl_cert_utils.py9
-rw-r--r--src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j28
-rw-r--r--src/pybind/mgr/cephadm/tests/test_cephadm.py16
-rw-r--r--src/pybind/mgr/cephadm/tests/test_services.py39
-rw-r--r--src/pybind/mgr/mgr_module.py2
-rw-r--r--src/pybind/mgr/orchestrator/_interface.py8
-rw-r--r--src/pybind/mgr/orchestrator/module.py33
-rw-r--r--src/python-common/CMakeLists.txt2
-rw-r--r--src/python-common/ceph/deployment/drive_group.py4
-rw-r--r--src/python-common/ceph/deployment/drive_selection/filter.py6
-rw-r--r--src/python-common/ceph/deployment/drive_selection/matchers.py5
-rw-r--r--src/python-common/ceph/deployment/drive_selection/selector.py2
-rw-r--r--src/python-common/ceph/deployment/inventory.py2
-rw-r--r--src/python-common/ceph/deployment/service_spec.py63
-rw-r--r--src/python-common/ceph/deployment/translate.py2
-rw-r--r--src/python-common/ceph/fs/earmarking.py20
-rw-r--r--src/python-common/ceph/tests/utils.py3
-rw-r--r--src/python-common/requirements-lint.txt2
-rw-r--r--src/python-common/tox.ini12
-rw-r--r--src/rgw/driver/dbstore/README.md14
-rw-r--r--src/rgw/driver/posix/README.md12
-rw-r--r--src/rgw/driver/rados/rgw_notify.cc11
-rw-r--r--src/rgw/rgw_amqp.cc9
-rw-r--r--src/rgw/rgw_common.cc4
-rw-r--r--src/rgw/rgw_kafka.cc7
-rw-r--r--src/rgw/rgw_lua_background.cc6
-rw-r--r--src/rgw/rgw_op.cc18
-rw-r--r--src/rgw/rgw_ratelimit.h4
-rw-r--r--src/rgw/rgw_rest_pubsub.cc6
-rw-r--r--src/rgw/rgw_rest_s3.cc14
-rw-r--r--src/test/admin_socket.cc2
-rw-r--r--src/test/admin_socket_output.h1
-rw-r--r--src/test/bench_log.cc2
-rw-r--r--src/test/bufferlist.cc2
-rw-r--r--src/test/ceph_argparse.cc1
-rw-r--r--src/test/common/Throttle.cc1
-rw-r--r--src/test/common/test_cdc.cc1
-rw-r--r--src/test/common/test_config.cc3
-rw-r--r--src/test/common/test_context.cc3
-rw-r--r--src/test/common/test_shared_cache.cc3
-rw-r--r--src/test/common/test_url_escape.cc2
-rw-r--r--src/test/compressor/test_compression.cc3
-rw-r--r--src/test/crimson/seastore/test_object_data_handler.cc24
-rw-r--r--src/test/crimson/seastore/test_transaction_manager.cc104
-rw-r--r--src/test/crypto.cc2
-rw-r--r--src/test/daemon_config.cc2
-rw-r--r--src/test/encoding.cc2
-rw-r--r--src/test/osd/types.cc2
-rw-r--r--src/test/osdc/object_cacher_stress.cc1
-rw-r--r--src/test/perf_counters.cc2
-rw-r--r--src/test/test_addrs.cc1
-rw-r--r--src/test/test_denc.cc2
-rw-r--r--src/test/test_features.cc2
-rw-r--r--src/test/test_mempool.cc2
-rw-r--r--src/test/test_perf_counters_cache.cc1
-rw-r--r--src/test/test_rewrite_latency.cc1
-rw-r--r--src/test/test_snap_mapper.cc1
-rw-r--r--src/test/test_striper.cc2
-rw-r--r--src/test/test_utime.cc3
-rw-r--r--src/test/test_workqueue.cc3
-rw-r--r--src/test/testcrypto.cc4
-rw-r--r--src/test/testkeys.cc3
-rw-r--r--src/tools/radosacl.cc2
198 files changed, 1625 insertions, 665 deletions
diff --git a/src/ceph-volume/ceph_volume/objectstore/lvmbluestore.py b/src/ceph-volume/ceph_volume/objectstore/lvmbluestore.py
index ba3719cd3f3..aa11d553723 100644
--- a/src/ceph-volume/ceph_volume/objectstore/lvmbluestore.py
+++ b/src/ceph-volume/ceph_volume/objectstore/lvmbluestore.py
@@ -367,7 +367,7 @@ class LvmBlueStore(BlueStore):
if is_encrypted:
osd_lv_path = '/dev/mapper/%s' % osd_block_lv.__dict__['lv_uuid']
lockbox_secret = osd_block_lv.tags['ceph.cephx_lockbox_secret']
- self.with_tpm = bool(osd_block_lv.tags.get('ceph.with_tpm', 0))
+ self.with_tpm = osd_block_lv.tags.get('ceph.with_tpm') == '1'
if not self.with_tpm:
encryption_utils.write_lockbox_keyring(osd_id,
osd_fsid,
diff --git a/src/ceph_mds.cc b/src/ceph_mds.cc
index 5a917fa807c..ba8726a2be3 100644
--- a/src/ceph_mds.cc
+++ b/src/ceph_mds.cc
@@ -81,7 +81,7 @@ static void handle_mds_signal(int signum)
int main(int argc, const char **argv)
{
- ceph_pthread_setname(pthread_self(), "ceph-mds");
+ ceph_pthread_setname("ceph-mds");
auto args = argv_to_vec(argc, argv);
if (args.empty()) {
diff --git a/src/ceph_mgr.cc b/src/ceph_mgr.cc
index 67bda0c51be..bd2c643bc6b 100644
--- a/src/ceph_mgr.cc
+++ b/src/ceph_mgr.cc
@@ -41,7 +41,7 @@ static void usage()
*/
int main(int argc, const char **argv)
{
- ceph_pthread_setname(pthread_self(), "ceph-mgr");
+ ceph_pthread_setname("ceph-mgr");
auto args = argv_to_vec(argc, argv);
if (args.empty()) {
diff --git a/src/ceph_mon.cc b/src/ceph_mon.cc
index 279fdb20ccb..63eb252e38f 100644
--- a/src/ceph_mon.cc
+++ b/src/ceph_mon.cc
@@ -250,7 +250,7 @@ int main(int argc, const char **argv)
{
// reset our process name, in case we did a respawn, so that it's not
// left as "exe".
- ceph_pthread_setname(pthread_self(), "ceph-mon");
+ ceph_pthread_setname("ceph-mon");
int err;
diff --git a/src/ceph_nvmeof_monitor_client.cc b/src/ceph_nvmeof_monitor_client.cc
index 05457998cb8..fa41bed08ad 100644
--- a/src/ceph_nvmeof_monitor_client.cc
+++ b/src/ceph_nvmeof_monitor_client.cc
@@ -45,7 +45,7 @@ static void usage()
*/
int main(int argc, const char **argv)
{
- ceph_pthread_setname(pthread_self(), "ceph-nvmeof-monitor-client");
+ ceph_pthread_setname("ceph-nvmeof-monitor-client");
auto args = argv_to_vec(argc, argv);
if (args.empty()) {
diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py
index f2172bf9083..d2ddf564116 100755
--- a/src/cephadm/cephadm.py
+++ b/src/cephadm/cephadm.py
@@ -57,6 +57,7 @@ from cephadmlib.constants import (
LOG_DIR_MODE,
SYSCTL_DIR,
UNIT_DIR,
+ DAEMON_FAILED_ERROR,
)
from cephadmlib.context import CephadmContext
from cephadmlib.context_getters import (
@@ -72,6 +73,7 @@ from cephadmlib.exceptions import (
ClusterAlreadyExists,
Error,
UnauthorizedRegistryError,
+ DaemonStartException,
)
from cephadmlib.exe_utils import find_executable, find_program
from cephadmlib.call_wrappers import (
@@ -1246,7 +1248,11 @@ def deploy_daemon_units(
call_throws(ctx, ['systemctl', 'enable', unit_name])
if start:
clean_cgroup(ctx, ident.fsid, unit_name)
- call_throws(ctx, ['systemctl', 'start', unit_name])
+ try:
+ call_throws(ctx, ['systemctl', 'start', unit_name])
+ except Exception as e:
+ logger.error(f'systemctl start failed for {unit_name}: {str(e)}')
+ raise DaemonStartException()
def _osd_unit_run_commands(
@@ -3050,7 +3056,10 @@ def get_deployment_type(
@deprecated_command
def command_deploy(ctx):
# type: (CephadmContext) -> None
- _common_deploy(ctx)
+ try:
+ _common_deploy(ctx)
+ except DaemonStartException:
+ sys.exit(DAEMON_FAILED_ERROR)
def apply_deploy_config_to_ctx(
@@ -3093,7 +3102,10 @@ def command_deploy_from(ctx: CephadmContext) -> None:
config_data = read_configuration_source(ctx)
logger.debug('Loaded deploy configuration: %r', config_data)
apply_deploy_config_to_ctx(config_data, ctx)
- _common_deploy(ctx)
+ try:
+ _common_deploy(ctx)
+ except DaemonStartException:
+ sys.exit(DAEMON_FAILED_ERROR)
def _common_deploy(ctx: CephadmContext) -> None:
@@ -4489,8 +4501,9 @@ def _rm_cluster(ctx: CephadmContext, keep_logs: bool, zap_osds: bool) -> None:
##################################
-def check_time_sync(ctx, enabler=None):
- # type: (CephadmContext, Optional[Packager]) -> bool
+def check_time_sync(
+ ctx: CephadmContext, enabler: Optional[Packager] = None
+) -> bool:
units = [
'chrony.service', # 18.04 (at least)
'chronyd.service', # el / opensuse
diff --git a/src/cephadm/cephadmlib/call_wrappers.py b/src/cephadm/cephadmlib/call_wrappers.py
index 3fe2171e99d..d3d327c218c 100644
--- a/src/cephadm/cephadmlib/call_wrappers.py
+++ b/src/cephadm/cephadmlib/call_wrappers.py
@@ -311,14 +311,14 @@ def call_throws(
return out, err, ret
-def call_timeout(ctx, command, timeout):
- # type: (CephadmContext, List[str], int) -> int
+def call_timeout(
+ ctx: CephadmContext, command: List[str], timeout: int
+) -> int:
logger.debug(
'Running command (timeout=%s): %s' % (timeout, ' '.join(command))
)
- def raise_timeout(command, timeout):
- # type: (List[str], int) -> NoReturn
+ def raise_timeout(command: List[str], timeout: int) -> NoReturn:
msg = 'Command `%s` timed out after %s seconds' % (command, timeout)
logger.debug(msg)
raise TimeoutExpired(msg)
diff --git a/src/cephadm/cephadmlib/constants.py b/src/cephadm/cephadmlib/constants.py
index 215d207729d..1df46353fb3 100644
--- a/src/cephadm/cephadmlib/constants.py
+++ b/src/cephadm/cephadmlib/constants.py
@@ -36,3 +36,4 @@ QUIET_LOG_LEVEL = 9 # DEBUG is 10, so using 9 to be lower level than DEBUG
NO_DEPRECATED = False
UID_NOBODY = 65534
GID_NOGROUP = 65534
+DAEMON_FAILED_ERROR = 17
diff --git a/src/cephadm/cephadmlib/daemon_identity.py b/src/cephadm/cephadmlib/daemon_identity.py
index 52a18092bf0..bfe1a855186 100644
--- a/src/cephadm/cephadmlib/daemon_identity.py
+++ b/src/cephadm/cephadmlib/daemon_identity.py
@@ -157,7 +157,7 @@ class DaemonSubIdentity(DaemonIdentity):
)
def sidecar_script(self, base_data_dir: Union[str, os.PathLike]) -> str:
- sname = f'sidecar-{ self.subcomponent }.run'
+ sname = f'sidecar-{self.subcomponent}.run'
return str(pathlib.Path(self.data_dir(base_data_dir)) / sname)
@property
diff --git a/src/cephadm/cephadmlib/daemons/ingress.py b/src/cephadm/cephadmlib/daemons/ingress.py
index 8f4f6b08991..c88e39ac025 100644
--- a/src/cephadm/cephadmlib/daemons/ingress.py
+++ b/src/cephadm/cephadmlib/daemons/ingress.py
@@ -82,8 +82,7 @@ class HAproxy(ContainerDaemonForm):
def get_daemon_args(self) -> List[str]:
return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg']
- def validate(self):
- # type: () -> None
+ def validate(self) -> None:
if not is_fsid(self.fsid):
raise Error('not an fsid: %s' % self.fsid)
if not self.daemon_id:
@@ -99,12 +98,10 @@ class HAproxy(ContainerDaemonForm):
'required file missing from config-json: %s' % fname
)
- def get_daemon_name(self):
- # type: () -> str
+ def get_daemon_name(self) -> str:
return '%s.%s' % (self.daemon_type, self.daemon_id)
- def get_container_name(self, desc=None):
- # type: (Optional[str]) -> str
+ def get_container_name(self, desc: Optional[str] = None) -> str:
cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
if desc:
cname = '%s-%s' % (cname, desc)
@@ -212,8 +209,7 @@ class Keepalived(ContainerDaemonForm):
# populate files from the config-json
populate_files(data_dir, self.files, uid, gid)
- def validate(self):
- # type: () -> None
+ def validate(self) -> None:
if not is_fsid(self.fsid):
raise Error('not an fsid: %s' % self.fsid)
if not self.daemon_id:
@@ -229,20 +225,17 @@ class Keepalived(ContainerDaemonForm):
'required file missing from config-json: %s' % fname
)
- def get_daemon_name(self):
- # type: () -> str
+ def get_daemon_name(self) -> str:
return '%s.%s' % (self.daemon_type, self.daemon_id)
- def get_container_name(self, desc=None):
- # type: (Optional[str]) -> str
+ def get_container_name(self, desc: Optional[str] = None) -> str:
cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
if desc:
cname = '%s-%s' % (cname, desc)
return cname
@staticmethod
- def get_container_envs():
- # type: () -> List[str]
+ def get_container_envs() -> List[str]:
envs = [
'KEEPALIVED_AUTOCONF=false',
'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf',
diff --git a/src/cephadm/cephadmlib/daemons/nfs.py b/src/cephadm/cephadmlib/daemons/nfs.py
index f09374d5f46..70ccea65b5b 100644
--- a/src/cephadm/cephadmlib/daemons/nfs.py
+++ b/src/cephadm/cephadmlib/daemons/nfs.py
@@ -42,9 +42,13 @@ class NFSGanesha(ContainerDaemonForm):
return cls.daemon_type == daemon_type
def __init__(
- self, ctx, fsid, daemon_id, config_json, image=DEFAULT_IMAGE
- ):
- # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
+ self,
+ ctx: CephadmContext,
+ fsid: str,
+ daemon_id: Union[int, str],
+ config_json: Dict,
+ image: str = DEFAULT_IMAGE,
+ ) -> None:
self.ctx = ctx
self.fsid = fsid
self.daemon_id = daemon_id
@@ -62,8 +66,9 @@ class NFSGanesha(ContainerDaemonForm):
self.validate()
@classmethod
- def init(cls, ctx, fsid, daemon_id):
- # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha
+ def init(
+ cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str]
+ ) -> 'NFSGanesha':
return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
@classmethod
diff --git a/src/cephadm/cephadmlib/daemons/nvmeof.py b/src/cephadm/cephadmlib/daemons/nvmeof.py
index 2c20a900f45..d916c7e6391 100644
--- a/src/cephadm/cephadmlib/daemons/nvmeof.py
+++ b/src/cephadm/cephadmlib/daemons/nvmeof.py
@@ -33,9 +33,13 @@ class CephNvmeof(ContainerDaemonForm):
return cls.daemon_type == daemon_type
def __init__(
- self, ctx, fsid, daemon_id, config_json, image=DEFAULT_NVMEOF_IMAGE
- ):
- # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
+ self,
+ ctx: CephadmContext,
+ fsid: str,
+ daemon_id: Union[int, str],
+ config_json: Dict,
+ image: str = DEFAULT_NVMEOF_IMAGE,
+ ) -> None:
self.ctx = ctx
self.fsid = fsid
self.daemon_id = daemon_id
@@ -48,8 +52,9 @@ class CephNvmeof(ContainerDaemonForm):
self.validate()
@classmethod
- def init(cls, ctx, fsid, daemon_id):
- # type: (CephadmContext, str, Union[int, str]) -> CephNvmeof
+ def init(
+ cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str]
+ ) -> 'CephNvmeof':
return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
@classmethod
@@ -73,13 +78,18 @@ class CephNvmeof(ContainerDaemonForm):
os.path.join(data_dir, 'ceph-nvmeof.conf')
] = '/src/ceph-nvmeof.conf:z'
mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
- mounts['/dev/hugepages'] = '/dev/hugepages'
- mounts['/dev/vfio/vfio'] = '/dev/vfio/vfio'
mounts[log_dir] = '/var/log/ceph:z'
if mtls_dir:
mounts[mtls_dir] = '/src/mtls:z'
return mounts
+ def _get_huge_pages_mounts(self, files: Dict[str, str]) -> Dict[str, str]:
+ mounts = dict()
+ if 'spdk_mem_size' not in files:
+ mounts['/dev/hugepages'] = '/dev/hugepages'
+ mounts['/dev/vfio/vfio'] = '/dev/vfio/vfio'
+ return mounts
+
def _get_tls_cert_key_mounts(
self, data_dir: str, files: Dict[str, str]
) -> Dict[str, str]:
@@ -111,6 +121,7 @@ class CephNvmeof(ContainerDaemonForm):
)
else:
mounts.update(self._get_container_mounts(data_dir, log_dir))
+ mounts.update(self._get_huge_pages_mounts(self.files))
mounts.update(self._get_tls_cert_key_mounts(data_dir, self.files))
def customize_container_binds(
@@ -198,11 +209,13 @@ class CephNvmeof(ContainerDaemonForm):
)
return cmd.split()
- @staticmethod
- def get_sysctl_settings() -> List[str]:
- return [
- 'vm.nr_hugepages = 4096',
- ]
+ def get_sysctl_settings(self) -> List[str]:
+ if 'spdk_mem_size' not in self.files:
+ return [
+ 'vm.nr_hugepages = 4096',
+ ]
+ else:
+ return []
def container(self, ctx: CephadmContext) -> CephContainer:
ctr = daemon_to_container(ctx, self)
@@ -222,4 +235,6 @@ class CephNvmeof(ContainerDaemonForm):
args.append(ctx.container_engine.unlimited_pids_option)
args.extend(['--ulimit', 'memlock=-1:-1'])
args.extend(['--ulimit', 'nofile=10240'])
- args.extend(['--cap-add=SYS_ADMIN', '--cap-add=CAP_SYS_NICE'])
+ args.extend(['--cap-add=CAP_SYS_NICE'])
+ if 'spdk_mem_size' not in self.files:
+ args.extend(['--cap-add=SYS_ADMIN'])
diff --git a/src/cephadm/cephadmlib/data_utils.py b/src/cephadm/cephadmlib/data_utils.py
index 0ab8b38d2b5..9caef3f72e5 100644
--- a/src/cephadm/cephadmlib/data_utils.py
+++ b/src/cephadm/cephadmlib/data_utils.py
@@ -189,8 +189,9 @@ def normalize_image_digest(digest: str) -> str:
return digest
-def get_legacy_config_fsid(cluster, legacy_dir=None):
- # type: (str, Optional[str]) -> Optional[str]
+def get_legacy_config_fsid(
+ cluster: str, legacy_dir: Optional[str] = None
+) -> Optional[str]:
config_file = '/etc/ceph/%s.conf' % cluster
if legacy_dir is not None:
config_file = os.path.abspath(legacy_dir + config_file)
diff --git a/src/cephadm/cephadmlib/exceptions.py b/src/cephadm/cephadmlib/exceptions.py
index 0d215fdd332..762ce782127 100644
--- a/src/cephadm/cephadmlib/exceptions.py
+++ b/src/cephadm/cephadmlib/exceptions.py
@@ -19,3 +19,16 @@ class UnauthorizedRegistryError(Error):
class PortOccupiedError(Error):
pass
+
+
+class DaemonStartException(Exception):
+ """
+ Special exception type we raise when the
+ systemctl start command fails during daemon
+ deployment. Necessary because the cephadm mgr module
+ needs to handle this case differently than a failure
+ earlier in the deploy process where no attempt was made
+ to actually start the daemon
+ """
+
+ pass
diff --git a/src/cephadm/cephadmlib/file_utils.py b/src/cephadm/cephadmlib/file_utils.py
index 399729f2dcc..27e70e31756 100644
--- a/src/cephadm/cephadmlib/file_utils.py
+++ b/src/cephadm/cephadmlib/file_utils.py
@@ -52,8 +52,9 @@ def write_new(
os.rename(tempname, destination)
-def populate_files(config_dir, config_files, uid, gid):
- # type: (str, Dict, int, int) -> None
+def populate_files(
+ config_dir: str, config_files: Dict, uid: int, gid: int
+) -> None:
"""create config files for different services"""
for fname in config_files:
config_file = os.path.join(config_dir, fname)
@@ -71,8 +72,7 @@ def touch(
os.chown(file_path, uid, gid)
-def write_tmp(s, uid, gid):
- # type: (str, int, int) -> IO[str]
+def write_tmp(s: str, uid: int, gid: int) -> IO[str]:
tmp_f = tempfile.NamedTemporaryFile(mode='w', prefix='ceph-tmp')
os.fchown(tmp_f.fileno(), uid, gid)
tmp_f.write(s)
@@ -97,8 +97,7 @@ def recursive_chown(path: str, uid: int, gid: int) -> None:
os.chown(os.path.join(dirpath, filename), uid, gid)
-def read_file(path_list, file_name=''):
- # type: (List[str], str) -> str
+def read_file(path_list: List[str], file_name: str = '') -> str:
"""Returns the content of the first file found within the `path_list`
:param path_list: list of file paths to search
@@ -123,14 +122,12 @@ def read_file(path_list, file_name=''):
return 'Unknown'
-def pathify(p):
- # type: (str) -> str
+def pathify(p: str) -> str:
p = os.path.expanduser(p)
return os.path.abspath(p)
-def get_file_timestamp(fn):
- # type: (str) -> Optional[str]
+def get_file_timestamp(fn: str) -> Optional[str]:
try:
mt = os.path.getmtime(fn)
return datetime.datetime.fromtimestamp(
diff --git a/src/cephadm/cephadmlib/systemd.py b/src/cephadm/cephadmlib/systemd.py
index a07757eccad..1956957d457 100644
--- a/src/cephadm/cephadmlib/systemd.py
+++ b/src/cephadm/cephadmlib/systemd.py
@@ -11,8 +11,7 @@ from .packagers import Packager
logger = logging.getLogger()
-def check_unit(ctx, unit_name):
- # type: (CephadmContext, str) -> Tuple[bool, str, bool]
+def check_unit(ctx: CephadmContext, unit_name: str) -> Tuple[bool, str, bool]:
# NOTE: we ignore the exit code here because systemctl outputs
# various exit codes based on the state of the service, but the
# string result is more explicit (and sufficient).
@@ -56,8 +55,9 @@ def check_unit(ctx, unit_name):
return (enabled, state, installed)
-def check_units(ctx, units, enabler=None):
- # type: (CephadmContext, List[str], Optional[Packager]) -> bool
+def check_units(
+ ctx: CephadmContext, units: List[str], enabler: Optional[Packager] = None
+) -> bool:
for u in units:
(enabled, state, installed) = check_unit(ctx, u)
if enabled and state == 'running':
diff --git a/src/cephadm/tests/test_agent.py b/src/cephadm/tests/test_agent.py
index 52cce74e1fb..8e453e3ac3c 100644
--- a/src/cephadm/tests/test_agent.py
+++ b/src/cephadm/tests/test_agent.py
@@ -668,7 +668,7 @@ def test_mgr_listener_run(_load_cert_chain, _load_verify_locations, _handle_json
agent.mgr_listener.run()
# verify payload was correctly extracted
- assert _handle_json_payload.called_with(json.loads(payload))
+ _handle_json_payload.assert_called_with(json.loads(payload))
FakeConn.send.assert_called_once_with(b'ACK')
# second run, with bad json data received
diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py
index f27b9bcd362..bbaaf2d39f8 100644
--- a/src/cephadm/tests/test_cephadm.py
+++ b/src/cephadm/tests/test_cephadm.py
@@ -1,5 +1,6 @@
# type: ignore
+import contextlib
import copy
import errno
import json
@@ -38,6 +39,13 @@ def get_ceph_conf(
mon_host = {mon_host}
'''
+@contextlib.contextmanager
+def bootstrap_test_ctx(*args, **kwargs):
+ with with_cephadm_ctx(*args, **kwargs) as ctx:
+ ctx.no_cleanup_on_failure = True
+ yield ctx
+
+
class TestCephAdm(object):
@mock.patch('cephadm.logger')
@@ -1432,13 +1440,13 @@ class TestBootstrap(object):
'--config', conf_file,
)
- with with_cephadm_ctx(cmd) as ctx:
+ with bootstrap_test_ctx(cmd) as ctx:
msg = r'No such file or directory'
with pytest.raises(_cephadm.Error, match=msg):
_cephadm.command_bootstrap(ctx)
cephadm_fs.create_file(conf_file)
- with with_cephadm_ctx(cmd) as ctx:
+ with bootstrap_test_ctx(cmd) as ctx:
retval = _cephadm.command_bootstrap(ctx)
assert retval == 0
@@ -1446,7 +1454,7 @@ class TestBootstrap(object):
funkypatch.patch('cephadmlib.systemd.call')
cmd = self._get_cmd()
- with with_cephadm_ctx(cmd) as ctx:
+ with bootstrap_test_ctx(cmd) as ctx:
msg = r'must specify --mon-ip or --mon-addrv'
with pytest.raises(_cephadm.Error, match=msg):
_cephadm.command_bootstrap(ctx)
@@ -1455,13 +1463,13 @@ class TestBootstrap(object):
funkypatch.patch('cephadmlib.systemd.call')
cmd = self._get_cmd('--mon-ip', '192.168.1.1')
- with with_cephadm_ctx(cmd, list_networks={}) as ctx:
+ with bootstrap_test_ctx(cmd, list_networks={}) as ctx:
msg = r'--skip-mon-network'
with pytest.raises(_cephadm.Error, match=msg):
_cephadm.command_bootstrap(ctx)
cmd += ['--skip-mon-network']
- with with_cephadm_ctx(cmd, list_networks={}) as ctx:
+ with bootstrap_test_ctx(cmd, list_networks={}) as ctx:
retval = _cephadm.command_bootstrap(ctx)
assert retval == 0
@@ -1540,12 +1548,12 @@ class TestBootstrap(object):
cmd = self._get_cmd('--mon-ip', mon_ip)
if not result:
- with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx:
+ with bootstrap_test_ctx(cmd, list_networks=list_networks) as ctx:
msg = r'--skip-mon-network'
with pytest.raises(_cephadm.Error, match=msg):
_cephadm.command_bootstrap(ctx)
else:
- with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx:
+ with bootstrap_test_ctx(cmd, list_networks=list_networks) as ctx:
retval = _cephadm.command_bootstrap(ctx)
assert retval == 0
@@ -1604,11 +1612,11 @@ class TestBootstrap(object):
cmd = self._get_cmd('--mon-addrv', mon_addrv)
if err:
- with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx:
+ with bootstrap_test_ctx(cmd, list_networks=list_networks) as ctx:
with pytest.raises(_cephadm.Error, match=err):
_cephadm.command_bootstrap(ctx)
else:
- with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx:
+ with bootstrap_test_ctx(cmd, list_networks=list_networks) as ctx:
retval = _cephadm.command_bootstrap(ctx)
assert retval == 0
@@ -1621,13 +1629,13 @@ class TestBootstrap(object):
'--skip-mon-network',
)
- with with_cephadm_ctx(cmd, hostname=hostname) as ctx:
+ with bootstrap_test_ctx(cmd, hostname=hostname) as ctx:
msg = r'--allow-fqdn-hostname'
with pytest.raises(_cephadm.Error, match=msg):
_cephadm.command_bootstrap(ctx)
cmd += ['--allow-fqdn-hostname']
- with with_cephadm_ctx(cmd, hostname=hostname) as ctx:
+ with bootstrap_test_ctx(cmd, hostname=hostname) as ctx:
retval = _cephadm.command_bootstrap(ctx)
assert retval == 0
@@ -1646,7 +1654,7 @@ class TestBootstrap(object):
'--fsid', fsid,
)
- with with_cephadm_ctx(cmd) as ctx:
+ with bootstrap_test_ctx(cmd) as ctx:
if err:
with pytest.raises(_cephadm.Error, match=err):
_cephadm.command_bootstrap(ctx)
@@ -1661,7 +1669,7 @@ class TestShell(object):
fsid = '00000000-0000-0000-0000-0000deadbeef'
cmd = ['shell', '--fsid', fsid]
- with with_cephadm_ctx(cmd) as ctx:
+ with bootstrap_test_ctx(cmd) as ctx:
retval = _cephadm.command_shell(ctx)
assert retval == 0
assert ctx.fsid == fsid
diff --git a/src/cephadm/tests/test_deploy.py b/src/cephadm/tests/test_deploy.py
index 58f212beff6..c5094db335f 100644
--- a/src/cephadm/tests/test_deploy.py
+++ b/src/cephadm/tests/test_deploy.py
@@ -495,6 +495,7 @@ def test_deploy_ceph_exporter_container(cephadm_fs, funkypatch):
def test_deploy_and_rm_iscsi(cephadm_fs, funkypatch):
# Test that the deploy and remove paths for iscsi (which has sidecar container)
# create and remove the correct unit files.
+ funkypatch.patch('shutil.rmtree') # fakefs + shutil.rmtree breaks on py3.12
mocks = _common_patches(funkypatch)
_firewalld = mocks['Firewalld']
fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
diff --git a/src/cephadm/tox.ini b/src/cephadm/tox.ini
index b999a0f552b..d643b1ba74f 100644
--- a/src/cephadm/tox.ini
+++ b/src/cephadm/tox.ini
@@ -12,14 +12,14 @@ skipsdist = true
max-line-length = 100
inline-quotes = '
ignore =
- E501, \
+ E501,
W503,
exclude =
- .tox, \
- .vagrant, \
- __pycache__, \
- *.pyc, \
- templates, \
+ .tox,
+ .vagrant,
+ __pycache__,
+ *.pyc,
+ templates,
.eggs
statistics = True
@@ -53,7 +53,7 @@ commands = mypy --config-file ../mypy.ini {posargs:cephadm.py cephadmlib}
[testenv:flake8]
allowlist_externals = bash
deps =
- flake8 == 5.0.4
+ flake8
flake8-quotes
commands =
flake8 --config=tox.ini {posargs:cephadm.py cephadmlib}
diff --git a/src/client/Client.cc b/src/client/Client.cc
index 21555d0d07c..c404057b929 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -3843,6 +3843,7 @@ void Client::send_cap(Inode *in, MetaSession *session, Cap *cap,
want,
flush,
cap->mseq,
+ cap->issue_seq,
cap_epoch_barrier);
/*
* Since the setattr will check the cephx mds auth access before
@@ -3856,7 +3857,6 @@ void Client::send_cap(Inode *in, MetaSession *session, Cap *cap,
m->caller_uid = -1;
m->caller_gid = -1;
- m->head.issue_seq = cap->issue_seq;
m->set_tid(flush_tid);
m->head.uid = in->uid;
@@ -5521,10 +5521,10 @@ void Client::handle_cap_export(MetaSession *session, Inode *in, const MConstRef<
if (it != in->caps.end()) {
Cap &tcap = it->second;
if (tcap.cap_id == m->peer.cap_id &&
- ceph_seq_cmp(tcap.seq, m->peer.seq) < 0) {
+ ceph_seq_cmp(tcap.seq, m->peer.issue_seq) < 0) {
tcap.cap_id = m->peer.cap_id;
- tcap.seq = m->peer.seq - 1;
- tcap.issue_seq = tcap.seq;
+ tcap.seq = m->peer.issue_seq - 1;
+ tcap.issue_seq = tcap.issue_seq;
tcap.issued |= cap.issued;
tcap.implemented |= cap.issued;
if (&cap == in->auth_cap)
@@ -5534,7 +5534,7 @@ void Client::handle_cap_export(MetaSession *session, Inode *in, const MConstRef<
}
} else {
add_update_cap(in, tsession.get(), m->peer.cap_id, cap.issued, 0,
- m->peer.seq - 1, m->peer.mseq, (uint64_t)-1,
+ m->peer.issue_seq - 1, m->peer.mseq, (uint64_t)-1,
&cap == in->auth_cap ? CEPH_CAP_FLAG_AUTH : 0,
cap.latest_perms);
}
diff --git a/src/client/MetaSession.cc b/src/client/MetaSession.cc
index b5160a84331..3baa833851f 100644
--- a/src/client/MetaSession.cc
+++ b/src/client/MetaSession.cc
@@ -56,7 +56,7 @@ void MetaSession::enqueue_cap_release(inodeno_t ino, uint64_t cap_id, ceph_seq_t
ceph_mds_cap_item i;
i.ino = ino;
i.cap_id = cap_id;
- i.seq = iseq;
+ i.issue_seq = iseq;
i.migrate_seq = mseq;
release->caps.push_back(i);
}
diff --git a/src/client/SyntheticClient.cc b/src/client/SyntheticClient.cc
index 3b408dd3f2d..6b315d2dee3 100644
--- a/src/client/SyntheticClient.cc
+++ b/src/client/SyntheticClient.cc
@@ -290,6 +290,7 @@ SyntheticClient::SyntheticClient(StandaloneClient *client, int w)
void *synthetic_client_thread_entry(void *ptr)
{
+ ceph_pthread_setname("client");
SyntheticClient *sc = static_cast<SyntheticClient*>(ptr);
//int r =
sc->run();
@@ -945,7 +946,6 @@ int SyntheticClient::start_thread()
pthread_create(&thread_id, NULL, synthetic_client_thread_entry, this);
ceph_assert(thread_id);
- ceph_pthread_setname(thread_id, "client");
return 0;
}
diff --git a/src/common/DecayCounter.h b/src/common/DecayCounter.h
index 9455ecc5a33..30570c72a30 100644
--- a/src/common/DecayCounter.h
+++ b/src/common/DecayCounter.h
@@ -16,7 +16,6 @@
#define CEPH_DECAYCOUNTER_H
#include "include/buffer.h"
-#include "common/Formatter.h"
#include "common/StackStringStream.h"
#include "common/ceph_time.h"
@@ -24,6 +23,8 @@
#include <list>
#include <sstream>
+namespace ceph { class Formatter; }
+
/**
*
* TODO: normalize value based on some function of half_life,
diff --git a/src/common/Graylog.cc b/src/common/Graylog.cc
index cbd63fab25f..099acacd803 100644
--- a/src/common/Graylog.cc
+++ b/src/common/Graylog.cc
@@ -2,6 +2,9 @@
// vim: ts=8 sw=2 smarttab
#include "Graylog.h"
+
+#include <iostream> // for std::cerr
+
#include "common/Formatter.h"
#include "common/LogEntry.h"
#include "log/Entry.h"
diff --git a/src/common/Journald.cc b/src/common/Journald.cc
index 164b65834a6..12e1a97e998 100644
--- a/src/common/Journald.cc
+++ b/src/common/Journald.cc
@@ -14,6 +14,9 @@
#include <sys/un.h>
#include <syslog.h>
#include <unistd.h>
+
+#include <iostream> // for std::cerr
+
#include <fmt/format.h>
#include <fmt/ostream.h>
@@ -23,7 +26,6 @@
#include "log/SubsystemMap.h"
#include "msg/msg_fmt.h"
-
namespace ceph::logging {
namespace {
diff --git a/src/common/StackStringStream.h b/src/common/StackStringStream.h
index 8cb48ff6fcd..6a144fb938a 100644
--- a/src/common/StackStringStream.h
+++ b/src/common/StackStringStream.h
@@ -18,10 +18,9 @@
#include <boost/container/small_vector.hpp>
#include <algorithm>
-#include <iostream>
#include <memory>
#include <ostream>
-#include <sstream>
+#include <string>
#include <string_view>
#include <vector>
diff --git a/src/common/Thread.cc b/src/common/Thread.cc
index 3903e8c0ed7..c714aa0aa87 100644
--- a/src/common/Thread.cc
+++ b/src/common/Thread.cc
@@ -83,7 +83,7 @@ void *Thread::entry_wrapper()
if (pid && cpuid >= 0)
_set_affinity(cpuid);
- ceph_pthread_setname(pthread_self(), Thread::thread_name.c_str());
+ ceph_pthread_setname(thread_name.c_str());
return entry();
}
@@ -154,7 +154,7 @@ int Thread::try_create(size_t stacksize)
void Thread::create(const char *name, size_t stacksize)
{
ceph_assert(strlen(name) < 16);
- Thread::thread_name = name;
+ thread_name = name;
int ret = try_create(stacksize);
if (ret != 0) {
@@ -203,24 +203,6 @@ int Thread::set_affinity(int id)
// Functions for std::thread
// =========================
-void set_thread_name(std::thread& t, const std::string& s) {
- int r = ceph_pthread_setname(t.native_handle(), s.c_str());
- if (r != 0) {
- throw std::system_error(r, std::generic_category());
- }
-}
-std::string get_thread_name(const std::thread& t) {
- std::string s(256, '\0');
-
- int r = ceph_pthread_getname(const_cast<std::thread&>(t).native_handle(),
- s.data(), s.length());
- if (r != 0) {
- throw std::system_error(r, std::generic_category());
- }
- s.resize(std::strlen(s.data()));
- return s;
-}
-
void kill(std::thread& t, int signal)
{
auto r = ceph_pthread_kill(t.native_handle(), signal);
diff --git a/src/common/Thread.h b/src/common/Thread.h
index d3892c1b36b..8dc0e6c3cbe 100644
--- a/src/common/Thread.h
+++ b/src/common/Thread.h
@@ -17,8 +17,8 @@
#define CEPH_THREAD_H
#include <functional>
+#include <string>
#include <string_view>
-#include <system_error>
#include <thread>
#include <cstring>
@@ -27,7 +27,6 @@
#include "include/ceph_assert.h"
#include "include/compat.h"
-#include "include/spinlock.h"
extern pid_t ceph_gettid();
@@ -36,7 +35,7 @@ class Thread {
pthread_t thread_id;
pid_t pid;
int cpuid;
- static inline thread_local std::string thread_name;
+ std::string thread_name;
void *entry_wrapper();
@@ -64,15 +63,10 @@ class Thread {
int join(void **prval = 0);
int detach();
int set_affinity(int cpuid);
- static const std::string get_thread_name() {
- return Thread::thread_name;
- }
};
// Functions for with std::thread
-void set_thread_name(std::thread& t, const std::string& s);
-std::string get_thread_name(const std::thread& t);
void kill(std::thread& t, int signal);
template<typename Fun, typename... Args>
@@ -81,7 +75,7 @@ std::thread make_named_thread(std::string_view n,
Args&& ...args) {
return std::thread([n = std::string(n)](auto&& fun, auto&& ...args) {
- ceph_pthread_setname(pthread_self(), n.data());
+ ceph_pthread_setname(n.data());
std::invoke(std::forward<Fun>(fun),
std::forward<Args>(args)...);
}, std::forward<Fun>(fun), std::forward<Args>(args)...);
diff --git a/src/common/Throttle.h b/src/common/Throttle.h
index e190b946c45..fb5d949b438 100644
--- a/src/common/Throttle.h
+++ b/src/common/Throttle.h
@@ -6,7 +6,7 @@
#include <atomic>
#include <chrono>
-#include <iostream>
+#include <iosfwd>
#include <list>
#include <map>
diff --git a/src/common/admin_socket.cc b/src/common/admin_socket.cc
index 1e73ce0836a..55b87de3207 100644
--- a/src/common/admin_socket.cc
+++ b/src/common/admin_socket.cc
@@ -12,7 +12,13 @@
*
*/
#include <poll.h>
+#include <signal.h>
#include <sys/un.h>
+
+#ifndef WIN32
+#include <sys/wait.h>
+#endif
+
#include <optional>
#include <stdlib.h>
diff --git a/src/common/assert.cc b/src/common/assert.cc
index 7fb4c2d726b..68ad99c878e 100644
--- a/src/common/assert.cc
+++ b/src/common/assert.cc
@@ -44,8 +44,7 @@ namespace ceph {
g_assert_line = line;
g_assert_func = func;
g_assert_thread = (unsigned long long)pthread_self();
- ceph_pthread_getname(pthread_self(), g_assert_thread_name,
- sizeof(g_assert_thread_name));
+ ceph_pthread_getname(g_assert_thread_name, sizeof(g_assert_thread_name));
ostringstream tss;
tss << ceph_clock_now();
@@ -122,8 +121,7 @@ namespace ceph {
g_assert_line = line;
g_assert_func = func;
g_assert_thread = (unsigned long long)pthread_self();
- ceph_pthread_getname(pthread_self(), g_assert_thread_name,
- sizeof(g_assert_thread_name));
+ ceph_pthread_getname(g_assert_thread_name, sizeof(g_assert_thread_name));
BufAppender ba(g_assert_msg, sizeof(g_assert_msg));
BackTrace *bt = new ClibBackTrace(1);
@@ -168,8 +166,7 @@ namespace ceph {
g_assert_line = line;
g_assert_func = func;
g_assert_thread = (unsigned long long)pthread_self();
- ceph_pthread_getname(pthread_self(), g_assert_thread_name,
- sizeof(g_assert_thread_name));
+ ceph_pthread_getname(g_assert_thread_name, sizeof(g_assert_thread_name));
BackTrace *bt = new ClibBackTrace(1);
snprintf(g_assert_msg, sizeof(g_assert_msg),
@@ -210,8 +207,7 @@ namespace ceph {
g_assert_line = line;
g_assert_func = func;
g_assert_thread = (unsigned long long)pthread_self();
- ceph_pthread_getname(pthread_self(), g_assert_thread_name,
- sizeof(g_assert_thread_name));
+ ceph_pthread_getname(g_assert_thread_name, sizeof(g_assert_thread_name));
BufAppender ba(g_assert_msg, sizeof(g_assert_msg));
BackTrace *bt = new ClibBackTrace(1);
diff --git a/src/common/buffer.cc b/src/common/buffer.cc
index b4640979289..4443ef14124 100644
--- a/src/common/buffer.cc
+++ b/src/common/buffer.cc
@@ -19,6 +19,8 @@
#include <sys/uio.h>
+#include <iostream>
+
#include "include/ceph_assert.h"
#include "include/types.h"
#include "include/buffer_raw.h"
diff --git a/src/common/ceph_argparse.cc b/src/common/ceph_argparse.cc
index 9b989fe7270..ad12e0b6764 100644
--- a/src/common/ceph_argparse.cc
+++ b/src/common/ceph_argparse.cc
@@ -16,6 +16,7 @@
#include "auth/Auth.h"
#include "common/ceph_argparse.h"
#include "common/config.h"
+#include "common/strtol.h" // for strict_strtof()
#include "common/version.h"
#include "include/str_list.h"
diff --git a/src/common/ceph_argparse.h b/src/common/ceph_argparse.h
index d63a2bdd796..5a160dd0b79 100644
--- a/src/common/ceph_argparse.h
+++ b/src/common/ceph_argparse.h
@@ -29,6 +29,8 @@
#include "common/entity_name.h"
#include "include/encoding.h"
+class entity_addrvec_t;
+
/////////////////////// Types ///////////////////////
class CephInitParameters
{
diff --git a/src/common/ceph_time.h b/src/common/ceph_time.h
index bae038862cf..01feff4c063 100644
--- a/src/common/ceph_time.h
+++ b/src/common/ceph_time.h
@@ -16,7 +16,7 @@
#define COMMON_CEPH_TIME_H
#include <chrono>
-#include <iostream>
+#include <iosfwd>
#include <string>
#include <optional>
#include <fmt/chrono.h>
diff --git a/src/common/ceph_timer.h b/src/common/ceph_timer.h
index bc324bfa243..7fb2c7bac12 100644
--- a/src/common/ceph_timer.h
+++ b/src/common/ceph_timer.h
@@ -98,6 +98,7 @@ class timer {
std::thread thread;
void timer_thread() {
+ ceph_pthread_setname("ceph_timer");
std::unique_lock l(lock);
while (!suspended) {
auto now = TC::now();
@@ -155,7 +156,6 @@ class timer {
public:
timer() : suspended(false) {
thread = std::thread(&timer::timer_thread, this);
- set_thread_name(thread, "ceph_timer");
}
// Create a suspended timer, jobs will be executed in order when
diff --git a/src/common/code_environment.cc b/src/common/code_environment.cc
index 14d55f60c30..21633fc5d41 100644
--- a/src/common/code_environment.cc
+++ b/src/common/code_environment.cc
@@ -11,6 +11,7 @@
* Foundation. See file COPYING.
*
*/
+#include "include/compat.h"
#include "common/code_environment.h"
@@ -18,10 +19,6 @@
#include "acconfig.h"
-#ifdef HAVE_PTHREAD_GETNAME_NP
-#include <pthread.h>
-#endif
-
#include <string.h>
code_environment_t g_code_env = CODE_ENVIRONMENT_UTILITY;
@@ -57,7 +54,7 @@ int get_process_name(char *buf, int len)
}
// FIPS zeroization audit 20191115: this memset is not security related.
memset(buf, 0, len);
- return pthread_getname_np(pthread_self(), buf, len);
+ return ceph_pthread_getname(buf, len);
}
#elif defined(HAVE_GETPROGNAME)
diff --git a/src/common/compat.cc b/src/common/compat.cc
index 82b57ad94b5..84a395c5a19 100644
--- a/src/common/compat.cc
+++ b/src/common/compat.cc
@@ -565,3 +565,66 @@ ssize_t get_self_exe_path(char* path, int buff_length) {
}
#endif /* _WIN32 */
+
+
+static thread_local char cached_thread_name[256]{};
+
+int ceph_pthread_setname(char const* name)
+{
+ strncpy(cached_thread_name, name, sizeof cached_thread_name - 1);
+#if defined(_WIN32) && defined(__clang__) && \
+ !defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
+ // In this case, llvm doesn't use the pthread api for std::thread.
+ // We cannot use native_handle() with the pthread api, nor can we pass
+ // it to Windows API functions.
+ return 0;
+#elif defined(HAVE_PTHREAD_SETNAME_NP)
+ #if defined(__APPLE__)
+ return pthread_setname_np(name);
+ #else
+ return pthread_setname_np(pthread_self(), name);
+ #endif
+#elif defined(HAVE_PTHREAD_SET_NAME_NP)
+ pthread_set_name_np(pthread_self(), name); \
+ return 0;
+#else
+ return 0;
+#endif
+}
+
+int ceph_pthread_getname(char* name, size_t len)
+{
+ if (cached_thread_name[0]) {
+ if (len > 0) {
+ strncpy(name, cached_thread_name, len);
+ name[len-1] = 0;
+ }
+ return 0;
+ } else {
+#if defined(_WIN32) && defined(__clang__) && \
+ !defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
+ if (len > 0) {
+ strcpy(name, "");
+ }
+ return 0;
+#elif defined(HAVE_PTHREAD_GETNAME_NP) || defined(HAVE_PTHREAD_GET_NAME_NP)
+# if defined(HAVE_PTHREAD_GETNAME_NP)
+ int rc = pthread_getname_np(pthread_self(), cached_thread_name, sizeof cached_thread_name);
+# else
+ int rc = pthread_get_name_np(pthread_self(), cached_thread_name, sizeof cached_thread_name);
+# endif
+ if (rc == 0) {
+ strncpy(name, cached_thread_name, len);
+ name[len-1] = 0;
+ return 0;
+ } else {
+ return rc;
+ }
+#else
+ if (len > 0) {
+ strcpy(name, "");
+ }
+ return 0;
+#endif
+ }
+}
diff --git a/src/common/config_cacher.h b/src/common/config_cacher.h
index a84bad08eee..91b8152dde1 100644
--- a/src/common/config_cacher.h
+++ b/src/common/config_cacher.h
@@ -50,7 +50,7 @@ public:
conf.remove_observer(this);
}
- operator ValueT() const {
+ ValueT operator*() const {
return value_cache.load();
}
};
diff --git a/src/common/error_code.cc b/src/common/error_code.cc
index ed0e681b22b..9c981a21077 100644
--- a/src/common/error_code.cc
+++ b/src/common/error_code.cc
@@ -13,10 +13,9 @@
* COPYING.
*/
-#include <exception>
+#include "common/error_code.h"
#include <boost/asio/error.hpp>
-#include "common/error_code.h"
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
diff --git a/src/common/error_code.h b/src/common/error_code.h
index e39122f8ce3..93a1bf31c00 100644
--- a/src/common/error_code.h
+++ b/src/common/error_code.h
@@ -16,9 +16,8 @@
#ifndef COMMON_CEPH_ERROR_CODE
#define COMMON_CEPH_ERROR_CODE
-#include <netdb.h>
-
-#include <boost/system.hpp>
+#include <boost/system/error_code.hpp>
+#include <boost/system/system_error.hpp>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
diff --git a/src/common/obj_bencher.cc b/src/common/obj_bencher.cc
index 32ecc958618..f5e744e2339 100644
--- a/src/common/obj_bencher.cc
+++ b/src/common/obj_bencher.cc
@@ -99,6 +99,7 @@ ostream& ObjBencher::out(ostream& os)
}
void *ObjBencher::status_printer(void *_bencher) {
+ ceph_pthread_setname("OB::stat_print");
ObjBencher *bencher = static_cast<ObjBencher *>(_bencher);
bench_data& data = bencher->data;
Formatter *formatter = bencher->formatter;
@@ -453,7 +454,6 @@ int ObjBencher::write_bench(int secondsToRun,
pthread_t print_thread;
pthread_create(&print_thread, NULL, ObjBencher::status_printer, (void *)this);
- ceph_pthread_setname(print_thread, "write_stat");
std::unique_lock locker{lock};
data.finished = 0;
data.start_time = mono_clock::now();
@@ -691,7 +691,6 @@ int ObjBencher::seq_read_bench(
pthread_t print_thread;
pthread_create(&print_thread, NULL, status_printer, (void *)this);
- ceph_pthread_setname(print_thread, "seq_read_stat");
mono_time finish_time = data.start_time + time_to_run;
//start initial reads
@@ -903,7 +902,6 @@ int ObjBencher::rand_read_bench(
pthread_t print_thread;
pthread_create(&print_thread, NULL, status_printer, (void *)this);
- ceph_pthread_setname(print_thread, "rand_read_stat");
mono_time finish_time = data.start_time + time_to_run;
//start initial reads
diff --git a/src/common/options.cc b/src/common/options.cc
index a68e2474a3d..3f6894b01c1 100644
--- a/src/common/options.cc
+++ b/src/common/options.cc
@@ -5,6 +5,7 @@
#include "options.h"
#include "common/Formatter.h"
#include "common/options/build_options.h"
+#include "common/strtol.h" // for strict_si_cast()
// Helpers for validators
#include "include/stringify.h"
diff --git a/src/common/options.h b/src/common/options.h
index abded4cc0dd..ec6db7770c3 100644
--- a/src/common/options.h
+++ b/src/common/options.h
@@ -4,6 +4,7 @@
#pragma once
#include <chrono>
+#include <iostream> // for std::cerr
#include <string>
#include <variant>
#include <vector>
diff --git a/src/common/options/mds.yaml.in b/src/common/options/mds.yaml.in
index 18efba561ed..94824faef6b 100644
--- a/src/common/options/mds.yaml.in
+++ b/src/common/options/mds.yaml.in
@@ -586,16 +586,6 @@ options:
min: 1
services:
- mds
-- name: mds_log_major_segment_event_ratio
- type: uint
- level: advanced
- desc: multiple of mds_log_events_per_segment between major segments
- default: 12
- services:
- - mds
- min: 1
- see_also:
- - mds_log_events_per_segment
# segment size for mds log, default to default file_layout_t
- name: mds_log_segment_size
type: size
@@ -1741,3 +1731,12 @@ options:
- mds
flags:
- runtime
+- name: mds_log_minor_segments_per_major_segment
+ type: uint
+ level: advanced
+ desc: number of minor segments per major segment.
+ long_desc: The number of minor mds log segments since last major segment after which a major segment is started/logged.
+ default: 16
+ services:
+ - mds
+ min: 8
diff --git a/src/common/options/osd.yaml.in b/src/common/options/osd.yaml.in
index 3bfff92c907..49099f42b71 100644
--- a/src/common/options/osd.yaml.in
+++ b/src/common/options/osd.yaml.in
@@ -346,7 +346,7 @@ options:
default: 5
see_also:
- osd_scrub_chunk_max
- with_legacy: true
+ with_legacy: false
- name: osd_scrub_chunk_max
type: int
level: advanced
@@ -357,7 +357,7 @@ options:
default: 15
see_also:
- osd_scrub_chunk_min
- with_legacy: true
+ with_legacy: false
- name: osd_shallow_scrub_chunk_min
type: int
level: advanced
@@ -369,7 +369,7 @@ options:
see_also:
- osd_shallow_scrub_chunk_max
- osd_scrub_chunk_min
- with_legacy: true
+ with_legacy: false
- name: osd_shallow_scrub_chunk_max
type: int
level: advanced
@@ -380,7 +380,7 @@ options:
see_also:
- osd_shallow_scrub_chunk_min
- osd_scrub_chunk_max
- with_legacy: true
+ with_legacy: false
# sleep between [deep]scrub ops
- name: osd_scrub_sleep
type: float
diff --git a/src/common/perf_counters.cc b/src/common/perf_counters.cc
index b5e361b505c..2eeaa80aae8 100644
--- a/src/common/perf_counters.cc
+++ b/src/common/perf_counters.cc
@@ -18,6 +18,7 @@
#include "common/dout.h"
#include "common/valgrind.h"
#include "include/common_fwd.h"
+#include "include/utime.h"
using std::ostringstream;
using std::make_pair;
diff --git a/src/common/perf_counters.h b/src/common/perf_counters.h
index 942edf6d7e5..0d0fe86a092 100644
--- a/src/common/perf_counters.h
+++ b/src/common/perf_counters.h
@@ -17,6 +17,8 @@
#ifndef CEPH_COMMON_PERF_COUNTERS_H
#define CEPH_COMMON_PERF_COUNTERS_H
+#include <functional>
+#include <set>
#include <string>
#include <vector>
#include <memory>
@@ -24,11 +26,12 @@
#include <cstdint>
#include "common/perf_histogram.h"
-#include "include/utime.h"
#include "include/common_fwd.h"
#include "common/ceph_mutex.h"
#include "common/ceph_time.h"
+class utime_t;
+
namespace TOPNSPC::common {
class CephContext;
class PerfCountersBuilder;
diff --git a/src/common/perf_counters_cache.h b/src/common/perf_counters_cache.h
index 866f56ee350..aa786fc5bf0 100644
--- a/src/common/perf_counters_cache.h
+++ b/src/common/perf_counters_cache.h
@@ -3,6 +3,7 @@
#include "common/perf_counters.h"
#include "common/ceph_context.h"
#include "common/intrusive_lru.h"
+#include "include/utime.h"
namespace ceph::perf_counters {
diff --git a/src/common/pick_address.cc b/src/common/pick_address.cc
index aa6b765bc56..d125d7171e0 100644
--- a/src/common/pick_address.cc
+++ b/src/common/pick_address.cc
@@ -15,6 +15,7 @@
#include "common/pick_address.h"
#include <bitset>
+#include <ifaddrs.h> // for struct ifaddrs
#include <netdb.h>
#include <netinet/in.h>
#ifdef _WIN32
@@ -40,6 +41,7 @@
#include "common/debug.h"
#include "common/errno.h"
#include "common/numa.h"
+#include "common/safe_io.h"
#ifndef HAVE_IN_ADDR_T
typedef uint32_t in_addr_t;
diff --git a/src/crimson/os/alienstore/alien_store.cc b/src/crimson/os/alienstore/alien_store.cc
index 3fd2bb1fd15..f390823a8a0 100644
--- a/src/crimson/os/alienstore/alien_store.cc
+++ b/src/crimson/os/alienstore/alien_store.cc
@@ -590,6 +590,19 @@ seastar::future<struct stat> AlienStore::stat(
});
}
+seastar::future<std::string> AlienStore::get_default_device_class()
+{
+ logger().debug("{}", __func__);
+ assert(tp);
+ return op_gates.simple_dispatch("get_default_device_class", [=, this] {
+ return tp->submit([=, this] {
+ return store->get_default_device_class();
+ }).then([] (std::string device_class) {
+ return seastar::make_ready_future<std::string>(device_class);
+ });
+ });
+}
+
auto AlienStore::omap_get_header(CollectionRef ch,
const ghobject_t& oid)
-> get_attr_errorator::future<ceph::bufferlist>
diff --git a/src/crimson/os/alienstore/alien_store.h b/src/crimson/os/alienstore/alien_store.h
index d36f449afd8..853585dac9c 100644
--- a/src/crimson/os/alienstore/alien_store.h
+++ b/src/crimson/os/alienstore/alien_store.h
@@ -98,6 +98,7 @@ public:
seastar::future<struct stat> stat(
CollectionRef,
const ghobject_t&) final;
+ seastar::future<std::string> get_default_device_class() final;
get_attr_errorator::future<ceph::bufferlist> omap_get_header(
CollectionRef,
const ghobject_t&) final;
diff --git a/src/crimson/os/alienstore/thread_pool.cc b/src/crimson/os/alienstore/thread_pool.cc
index 5cf9590e61e..277055ec51e 100644
--- a/src/crimson/os/alienstore/thread_pool.cc
+++ b/src/crimson/os/alienstore/thread_pool.cc
@@ -27,7 +27,7 @@ ThreadPool::ThreadPool(size_t n_threads,
pin(*cpus);
}
block_sighup();
- (void) pthread_setname_np(pthread_self(), "alien-store-tp");
+ (void) ceph_pthread_setname("alien-store-tp");
loop(queue_max_wait, i);
});
}
diff --git a/src/crimson/os/cyanstore/cyan_store.cc b/src/crimson/os/cyanstore/cyan_store.cc
index 7b945e5aa15..3f861a9271f 100644
--- a/src/crimson/os/cyanstore/cyan_store.cc
+++ b/src/crimson/os/cyanstore/cyan_store.cc
@@ -143,6 +143,12 @@ CyanStore::list_collections()
});
}
+seastar::future<std::string>
+CyanStore::get_default_device_class()
+{
+ return seastar::make_ready_future<std::string>("");
+}
+
CyanStore::mount_ertr::future<> CyanStore::Shard::mount()
{
static const char read_file_errmsg[]{"read_file"};
diff --git a/src/crimson/os/cyanstore/cyan_store.h b/src/crimson/os/cyanstore/cyan_store.h
index 99583d07d36..e9394991bc2 100644
--- a/src/crimson/os/cyanstore/cyan_store.h
+++ b/src/crimson/os/cyanstore/cyan_store.h
@@ -221,6 +221,8 @@ public:
seastar::future<std::vector<coll_core_t>> list_collections() final;
+ seastar::future<std::string> get_default_device_class() final;
+
private:
seastar::sharded<CyanStore::Shard> shard_stores;
const std::string path;
diff --git a/src/crimson/os/futurized_store.h b/src/crimson/os/futurized_store.h
index 0dca695ba3a..51ef2331014 100644
--- a/src/crimson/os/futurized_store.h
+++ b/src/crimson/os/futurized_store.h
@@ -203,6 +203,7 @@ public:
using coll_core_t = std::pair<coll_t, core_id_t>;
virtual seastar::future<std::vector<coll_core_t>> list_collections() = 0;
+ virtual seastar::future<std::string> get_default_device_class() = 0;
protected:
const core_id_t primary_core;
};
diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc
index 5dcb7514ee1..70fec7caca4 100644
--- a/src/crimson/os/seastore/cache.cc
+++ b/src/crimson/os/seastore/cache.cc
@@ -172,6 +172,7 @@ void Cache::register_metrics()
{extent_types_t::LADDR_INTERNAL, sm::label_instance("ext", "LADDR_INTERNAL")},
{extent_types_t::LADDR_LEAF, sm::label_instance("ext", "LADDR_LEAF")},
{extent_types_t::DINK_LADDR_LEAF, sm::label_instance("ext", "DINK_LADDR_LEAF")},
+ {extent_types_t::ROOT_META, sm::label_instance("ext", "ROOT_META")},
{extent_types_t::OMAP_INNER, sm::label_instance("ext", "OMAP_INNER")},
{extent_types_t::OMAP_LEAF, sm::label_instance("ext", "OMAP_LEAF")},
{extent_types_t::ONODE_BLOCK_STAGED, sm::label_instance("ext", "ONODE_BLOCK_STAGED")},
@@ -1093,6 +1094,9 @@ CachedExtentRef Cache::alloc_new_extent_by_type(
case extent_types_t::LADDR_LEAF:
return alloc_new_non_data_extent<lba_manager::btree::LBALeafNode>(
t, length, hint, gen);
+ case extent_types_t::ROOT_META:
+ return alloc_new_non_data_extent<RootMetaBlock>(
+ t, length, hint, gen);
case extent_types_t::ONODE_BLOCK_STAGED:
return alloc_new_non_data_extent<onode::SeastoreNodeExtent>(
t, length, hint, gen);
@@ -2193,6 +2197,12 @@ Cache::do_get_caching_extent_by_type(
).safe_then([](auto extent) {
return CachedExtentRef(extent.detach(), false /* add_ref */);
});
+ case extent_types_t::ROOT_META:
+ return do_get_caching_extent<RootMetaBlock>(
+ offset, length, std::move(extent_init_func), std::move(on_cache)
+ ).safe_then([](auto extent) {
+ return CachedExtentRef(extent.detach(), false /* add_ref */);
+ });
case extent_types_t::OMAP_INNER:
return do_get_caching_extent<omap_manager::OMapInnerNode>(
offset, length, std::move(extent_init_func), std::move(on_cache)
diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h
index dba3610e95f..c37d9c5c7cd 100644
--- a/src/crimson/os/seastore/cache.h
+++ b/src/crimson/os/seastore/cache.h
@@ -978,7 +978,8 @@ public:
auto result = epm.alloc_new_non_data_extent(t, T::TYPE, length, hint, gen);
#endif
if (!result) {
- return nullptr;
+ SUBERRORT(seastore_cache, "insufficient space", t);
+ std::rethrow_exception(crimson::ct_error::enospc::exception_ptr());
}
auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result->bp));
ret->init(CachedExtent::extent_state_t::INITIAL_WRITE_PENDING,
@@ -1019,6 +1020,10 @@ public:
#else
auto results = epm.alloc_new_data_extents(t, T::TYPE, length, hint, gen);
#endif
+ if (results.empty()) {
+ SUBERRORT(seastore_cache, "insufficient space", t);
+ std::rethrow_exception(crimson::ct_error::enospc::exception_ptr());
+ }
std::vector<TCachedExtentRef<T>> extents;
for (auto &result : results) {
auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result.bp));
diff --git a/src/crimson/os/seastore/onode.h b/src/crimson/os/seastore/onode.h
index 072c57864be..fa2ed65c0f3 100644
--- a/src/crimson/os/seastore/onode.h
+++ b/src/crimson/os/seastore/onode.h
@@ -36,8 +36,8 @@ struct onode_layout_t {
object_data_le_t object_data;
- char oi[MAX_OI_LENGTH];
- char ss[MAX_SS_LENGTH];
+ char oi[MAX_OI_LENGTH] = {0};
+ char ss[MAX_SS_LENGTH] = {0};
} __attribute__((packed));
class Transaction;
diff --git a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc
index 9f6a566d15c..97b7902edf5 100644
--- a/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc
+++ b/src/crimson/os/seastore/random_block_manager/block_rb_manager.cc
@@ -188,10 +188,10 @@ BlockRBManager::write_ertr::future<> BlockRBManager::write(
void BlockRBManager::prefill_fragmented_device()
{
LOG_PREFIX(BlockRBManager::prefill_fragmented_device);
- // the first 2 blocks must be allocated to lba root
+ // the first 3 blocks must be allocated to lba root
// and backref root during mkfs
- for (size_t block = get_block_size() * 2;
- block <= get_size() - get_block_size() * 2;
+ for (size_t block = get_block_size() * 3;
+ block <= get_size() - get_block_size() * 3;
block += get_block_size() * 2) {
DEBUG("marking {}~{} used",
get_start_rbm_addr() + block,
diff --git a/src/crimson/os/seastore/root_meta.h b/src/crimson/os/seastore/root_meta.h
new file mode 100644
index 00000000000..edf082f1e38
--- /dev/null
+++ b/src/crimson/os/seastore/root_meta.h
@@ -0,0 +1,76 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include "crimson/os/seastore/cached_extent.h"
+
+namespace crimson::os::seastore {
+
+struct RootMetaBlock : LogicalCachedExtent {
+ using meta_t = std::map<std::string, std::string>;
+ using Ref = TCachedExtentRef<RootMetaBlock>;
+ static constexpr size_t SIZE = 4096;
+ static constexpr int MAX_META_LENGTH = 1024;
+
+ explicit RootMetaBlock(ceph::bufferptr &&ptr)
+ : LogicalCachedExtent(std::move(ptr)) {}
+ explicit RootMetaBlock(extent_len_t length)
+ : LogicalCachedExtent(length) {}
+ RootMetaBlock(const RootMetaBlock &rhs)
+ : LogicalCachedExtent(rhs) {}
+
+ CachedExtentRef duplicate_for_write(Transaction&) final {
+ return CachedExtentRef(new RootMetaBlock(*this));
+ }
+
+ static constexpr extent_types_t TYPE = extent_types_t::ROOT_META;
+ extent_types_t get_type() const final {
+ return extent_types_t::ROOT_META;
+ }
+
+ /// dumps root meta as delta
+ ceph::bufferlist get_delta() final {
+ ceph::bufferlist bl;
+ ceph::buffer::ptr bptr(get_bptr(), 0, MAX_META_LENGTH);
+ bl.append(bptr);
+ return bl;
+ }
+
+ /// overwrites root
+ void apply_delta(const ceph::bufferlist &_bl) final
+ {
+ assert(_bl.length() == MAX_META_LENGTH);
+ ceph::bufferlist bl = _bl;
+ bl.rebuild();
+ get_bptr().copy_in(0, MAX_META_LENGTH, bl.front().c_str());
+ }
+
+ meta_t get_meta() const {
+ bufferlist bl;
+ bl.append(get_bptr());
+ meta_t ret;
+ auto iter = bl.cbegin();
+ decode(ret, iter);
+ return ret;
+ }
+
+ void set_meta(const meta_t &m) {
+ ceph::bufferlist bl;
+ encode(m, bl);
+ ceph_assert(bl.length() <= MAX_META_LENGTH);
+ bl.rebuild();
+ get_bptr().zero(0, MAX_META_LENGTH);
+ get_bptr().copy_in(0, bl.length(), bl.front().c_str());
+ }
+
+};
+using RootMetaBlockRef = RootMetaBlock::Ref;
+
+} // crimson::os::seastore
+
+
+#if FMT_VERSION >= 90000
+template <> struct fmt::formatter<crimson::os::seastore::RootMetaBlock>
+ : fmt::ostream_formatter {};
+#endif
diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc
index d90edbb20db..2bf55d5cde7 100644
--- a/src/crimson/os/seastore/seastore.cc
+++ b/src/crimson/os/seastore/seastore.cc
@@ -2721,6 +2721,13 @@ SeaStore::read_meta(const std::string& key)
);
}
+seastar::future<std::string> SeaStore::get_default_device_class()
+{
+ using crimson::common::get_conf;
+ std::string type = get_conf<std::string>("seastore_main_device_type");
+ return seastar::make_ready_future<std::string>(type);
+}
+
uuid_d SeaStore::Shard::get_fsid() const
{
return device->get_meta().seastore_id;
diff --git a/src/crimson/os/seastore/seastore.h b/src/crimson/os/seastore/seastore.h
index 185072744f2..fd7e177da63 100644
--- a/src/crimson/os/seastore/seastore.h
+++ b/src/crimson/os/seastore/seastore.h
@@ -191,6 +191,8 @@ public:
seastar::future<> write_meta(const std::string& key,
const std::string& value);
+ seastar::future<std::string> get_default_device_class();
+
store_statfs_t stat() const;
uuid_d get_fsid() const;
@@ -567,6 +569,8 @@ public:
seastar::future<std::vector<coll_core_t>> list_collections() final;
+ seastar::future<std::string> get_default_device_class() final;
+
FuturizedStore::Shard& get_sharded_store() final {
return shard_stores.local();
}
diff --git a/src/crimson/os/seastore/seastore_types.cc b/src/crimson/os/seastore/seastore_types.cc
index f379dd0117c..450118e5e75 100644
--- a/src/crimson/os/seastore/seastore_types.cc
+++ b/src/crimson/os/seastore/seastore_types.cc
@@ -246,6 +246,8 @@ std::ostream &operator<<(std::ostream &out, extent_types_t t)
return out << "LADDR_LEAF";
case extent_types_t::ONODE_BLOCK_STAGED:
return out << "ONODE_BLOCK_STAGED";
+ case extent_types_t::ROOT_META:
+ return out << "ROOT_META";
case extent_types_t::OMAP_INNER:
return out << "OMAP_INNER";
case extent_types_t::OMAP_LEAF:
diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h
index df5c184e7ab..65cad878fba 100644
--- a/src/crimson/os/seastore/seastore_types.h
+++ b/src/crimson/os/seastore/seastore_types.h
@@ -1378,23 +1378,24 @@ enum class extent_types_t : uint8_t {
LADDR_INTERNAL = 1,
LADDR_LEAF = 2,
DINK_LADDR_LEAF = 3, // should only be used for unitttests
- OMAP_INNER = 4,
- OMAP_LEAF = 5,
- ONODE_BLOCK_STAGED = 6,
- COLL_BLOCK = 7,
- OBJECT_DATA_BLOCK = 8,
- RETIRED_PLACEHOLDER = 9,
+ ROOT_META = 4,
+ OMAP_INNER = 5,
+ OMAP_LEAF = 6,
+ ONODE_BLOCK_STAGED = 7,
+ COLL_BLOCK = 8,
+ OBJECT_DATA_BLOCK = 9,
+ RETIRED_PLACEHOLDER = 10,
// the following two types are not extent types,
// they are just used to indicates paddr allocation deltas
- ALLOC_INFO = 10,
- JOURNAL_TAIL = 11,
+ ALLOC_INFO = 11,
+ JOURNAL_TAIL = 12,
// Test Block Types
- TEST_BLOCK = 12,
- TEST_BLOCK_PHYSICAL = 13,
- BACKREF_INTERNAL = 14,
- BACKREF_LEAF = 15,
+ TEST_BLOCK = 13,
+ TEST_BLOCK_PHYSICAL = 14,
+ BACKREF_INTERNAL = 15,
+ BACKREF_LEAF = 16,
// None and the number of valid extent_types_t
- NONE = 16,
+ NONE = 17,
};
using extent_types_le_t = uint8_t;
constexpr auto EXTENT_TYPES_MAX = static_cast<uint8_t>(extent_types_t::NONE);
@@ -1409,12 +1410,12 @@ constexpr bool is_data_type(extent_types_t type) {
}
constexpr bool is_logical_metadata_type(extent_types_t type) {
- return type >= extent_types_t::OMAP_INNER &&
+ return type >= extent_types_t::ROOT_META &&
type <= extent_types_t::COLL_BLOCK;
}
constexpr bool is_logical_type(extent_types_t type) {
- if ((type >= extent_types_t::OMAP_INNER &&
+ if ((type >= extent_types_t::ROOT_META &&
type <= extent_types_t::OBJECT_DATA_BLOCK) ||
type == extent_types_t::TEST_BLOCK) {
assert(is_logical_metadata_type(type) ||
@@ -1926,44 +1927,18 @@ using backref_root_t = phy_tree_root_t;
* TODO: generalize this to permit more than one lba_manager implementation
*/
struct __attribute__((packed)) root_t {
- using meta_t = std::map<std::string, std::string>;
-
- static constexpr int MAX_META_LENGTH = 1024;
-
backref_root_t backref_root;
lba_root_t lba_root;
laddr_le_t onode_root;
coll_root_le_t collection_root;
+ laddr_le_t meta;
- char meta[MAX_META_LENGTH];
-
- root_t() {
- set_meta(meta_t{});
- }
+ root_t() = default;
void adjust_addrs_from_base(paddr_t base) {
lba_root.adjust_addrs_from_base(base);
backref_root.adjust_addrs_from_base(base);
}
-
- meta_t get_meta() {
- bufferlist bl;
- bl.append(ceph::buffer::create_static(MAX_META_LENGTH, meta));
- meta_t ret;
- auto iter = bl.cbegin();
- decode(ret, iter);
- return ret;
- }
-
- void set_meta(const meta_t &m) {
- ceph::bufferlist bl;
- encode(m, bl);
- ceph_assert(bl.length() < MAX_META_LENGTH);
- bl.rebuild();
- auto &bptr = bl.front();
- ::memset(meta, 0, MAX_META_LENGTH);
- ::memcpy(meta, bptr.c_str(), bl.length());
- }
};
struct alloc_blk_t {
diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc
index f4e3b0858f2..717c3822db9 100644
--- a/src/crimson/os/seastore/transaction_manager.cc
+++ b/src/crimson/os/seastore/transaction_manager.cc
@@ -74,6 +74,8 @@ TransactionManager::mkfs_ertr::future<> TransactionManager::mkfs()
return lba_manager->mkfs(t);
}).si_then([this, &t] {
return backref_manager->mkfs(t);
+ }).si_then([this, &t] {
+ return init_root_meta(t);
}).si_then([this, FNAME, &t] {
INFOT("submitting mkfs transaction", t);
return submit_transaction_direct(t);
diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h
index c7a94a9ef11..841c5638abc 100644
--- a/src/crimson/os/seastore/transaction_manager.h
+++ b/src/crimson/os/seastore/transaction_manager.h
@@ -23,6 +23,7 @@
#include "crimson/os/seastore/logging.h"
#include "crimson/os/seastore/seastore_types.h"
#include "crimson/os/seastore/cache.h"
+#include "crimson/os/seastore/root_meta.h"
#include "crimson/os/seastore/lba_manager.h"
#include "crimson/os/seastore/backref_manager.h"
#include "crimson/os/seastore/journal.h"
@@ -303,10 +304,6 @@ public:
len,
placement_hint,
INIT_GENERATION);
- if (!ext) {
- SUBERRORT(seastore_tm, "insufficient space!", t);
- return crimson::ct_error::enospc::make();
- }
return lba_manager->alloc_extent(
t,
laddr_hint,
@@ -342,10 +339,6 @@ public:
len,
placement_hint,
INIT_GENERATION);
- if (exts.empty()) {
- SUBERRORT(seastore_tm, "insufficient space!", t);
- return crimson::ct_error::enospc::make();
- }
return lba_manager->alloc_extents(
t,
laddr_hint,
@@ -690,9 +683,11 @@ public:
const std::string &key) {
return cache->get_root(
t
- ).si_then([&key, &t](auto root) {
+ ).si_then([&t, this](auto root) {
+ return read_extent<RootMetaBlock>(t, root->root.meta);
+ }).si_then([key, &t](auto mblock) {
LOG_PREFIX(TransactionManager::read_root_meta);
- auto meta = root->root.get_meta();
+ auto meta = mblock->get_meta();
auto iter = meta.find(key);
if (iter == meta.end()) {
SUBDEBUGT(seastore_tm, "{} -> nullopt", t, key);
@@ -701,7 +696,35 @@ public:
SUBDEBUGT(seastore_tm, "{} -> {}", t, key, iter->second);
return seastar::make_ready_future<read_root_meta_bare>(iter->second);
}
- });
+ }).handle_error_interruptible(
+ crimson::ct_error::input_output_error::pass_further{},
+ crimson::ct_error::assert_all{"unexpected error!"}
+ );
+ }
+
+ /**
+ * init_root_meta
+ *
+ * create the root meta block
+ */
+ using init_root_meta_iertr = base_iertr;
+ using init_root_meta_ret = init_root_meta_iertr::future<>;
+ init_root_meta_ret init_root_meta(Transaction &t) {
+ return alloc_non_data_extent<RootMetaBlock>(
+ t, L_ADDR_MIN, RootMetaBlock::SIZE
+ ).si_then([this, &t](auto meta) {
+ meta->set_meta(RootMetaBlock::meta_t{});
+ return cache->get_root(t
+ ).si_then([this, &t, meta](auto root) {
+ auto mroot = cache->duplicate_for_write(
+ t, root)->template cast<RootBlock>();
+ mroot->root.meta = meta->get_laddr();
+ return seastar::now();
+ });
+ }).handle_error_interruptible(
+ crimson::ct_error::input_output_error::pass_further{},
+ crimson::ct_error::assert_all{"unexpected error!"}
+ );
}
/**
@@ -719,15 +742,21 @@ public:
SUBDEBUGT(seastore_tm, "seastore_tm, {} -> {} ...", t, key, value);
return cache->get_root(
t
- ).si_then([this, &t, &key, &value](RootBlockRef root) {
- root = cache->duplicate_for_write(t, root)->cast<RootBlock>();
+ ).si_then([this, &t](RootBlockRef root) {
+ return read_extent<RootMetaBlock>(t, root->root.meta);
+ }).si_then([this, key, value, &t](auto mblock) {
+ mblock = get_mutable_extent(t, mblock
+ )->template cast<RootMetaBlock>();
- auto meta = root->root.get_meta();
+ auto meta = mblock->get_meta();
meta[key] = value;
- root->root.set_meta(meta);
+ mblock->set_meta(meta);
return seastar::now();
- });
+ }).handle_error_interruptible(
+ crimson::ct_error::input_output_error::pass_further{},
+ crimson::ct_error::assert_all{"unexpected error!"}
+ );
}
/**
diff --git a/src/crimson/osd/backfill_state.cc b/src/crimson/osd/backfill_state.cc
index a77cbe87652..837fd2eb2af 100644
--- a/src/crimson/osd/backfill_state.cc
+++ b/src/crimson/osd/backfill_state.cc
@@ -379,16 +379,17 @@ BackfillState::Enqueuing::Enqueuing(my_context ctx)
trim_backfilled_object_from_intervals(std::move(result),
backfill_state().last_backfill_started,
backfill_state().peer_backfill_info);
- } else {
+ backfill_listener().maybe_flush();
+ } else if (!primary_bi.empty()) {
auto result = update_on_peers(check);
trim_backfilled_object_from_intervals(std::move(result),
backfill_state().last_backfill_started,
backfill_state().peer_backfill_info);
- if (!primary_bi.empty()) {
- primary_bi.pop_front();
- }
+ primary_bi.pop_front();
+ backfill_listener().maybe_flush();
+ } else {
+ break;
}
- backfill_listener().maybe_flush();
} while (!all_emptied(primary_bi, backfill_state().peer_backfill_info));
if (backfill_state().progress_tracker->tracked_objects_completed()
@@ -610,4 +611,12 @@ void BackfillState::ProgressTracker::complete_to(
}
}
+void BackfillState::enqueue_standalone_push(
+ const hobject_t &obj,
+ const eversion_t &v,
+ const std::vector<pg_shard_t> &peers) {
+ progress_tracker->enqueue_push(obj);
+ backfill_machine.backfill_listener.enqueue_push(obj, v, peers);
+}
+
} // namespace crimson::osd
diff --git a/src/crimson/osd/backfill_state.h b/src/crimson/osd/backfill_state.h
index a49cbeaac06..072c91e079d 100644
--- a/src/crimson/osd/backfill_state.h
+++ b/src/crimson/osd/backfill_state.h
@@ -304,6 +304,15 @@ public:
backfill_machine.process_event(*std::move(evt));
}
+ void enqueue_standalone_push(
+ const hobject_t &obj,
+ const eversion_t &v,
+ const std::vector<pg_shard_t> &peers);
+
+ bool is_triggered() const {
+ return backfill_machine.triggering_event() != nullptr;
+ }
+
hobject_t get_last_backfill_started() const {
return last_backfill_started;
}
diff --git a/src/crimson/osd/ec_backend.cc b/src/crimson/osd/ec_backend.cc
index 32eaaf02b3f..007d0bf35f3 100644
--- a/src/crimson/osd/ec_backend.cc
+++ b/src/crimson/osd/ec_backend.cc
@@ -26,6 +26,7 @@ ECBackend::_read(const hobject_t& hoid,
ECBackend::rep_op_fut_t
ECBackend::submit_transaction(const std::set<pg_shard_t> &pg_shards,
const hobject_t& hoid,
+ crimson::osd::ObjectContextRef&& new_clone,
ceph::os::Transaction&& txn,
osd_op_params_t&& osd_op_p,
epoch_t min_epoch, epoch_t max_epoch,
diff --git a/src/crimson/osd/ec_backend.h b/src/crimson/osd/ec_backend.h
index 90a7e2b1f4d..b14c78c9fc4 100644
--- a/src/crimson/osd/ec_backend.h
+++ b/src/crimson/osd/ec_backend.h
@@ -28,6 +28,7 @@ private:
rep_op_fut_t
submit_transaction(const std::set<pg_shard_t> &pg_shards,
const hobject_t& hoid,
+ crimson::osd::ObjectContextRef&& new_clone,
ceph::os::Transaction&& txn,
osd_op_params_t&& req,
epoch_t min_epoch, epoch_t max_epoch,
diff --git a/src/crimson/osd/ops_executer.cc b/src/crimson/osd/ops_executer.cc
index 4e735c3b4cb..97b241fdce4 100644
--- a/src/crimson/osd/ops_executer.cc
+++ b/src/crimson/osd/ops_executer.cc
@@ -940,6 +940,7 @@ std::unique_ptr<OpsExecuter::CloningContext> OpsExecuter::execute_clone(
};
encode(cloned_snaps, cloning_ctx->log_entry.snaps);
cloning_ctx->log_entry.clean_regions.mark_data_region_dirty(0, initial_obs.oi.size);
+ cloning_ctx->clone_obc = clone_obc;
return cloning_ctx;
}
@@ -966,7 +967,7 @@ void OpsExecuter::update_clone_overlap() {
void OpsExecuter::CloningContext::apply_to(
std::vector<pg_log_entry_t>& log_entries,
- ObjectContext& processed_obc) &&
+ ObjectContext& processed_obc)
{
log_entry.mtime = processed_obc.obs.oi.mtime;
log_entries.insert(log_entries.begin(), std::move(log_entry));
@@ -983,7 +984,7 @@ OpsExecuter::flush_clone_metadata(
assert(!txn.empty());
update_clone_overlap();
if (cloning_ctx) {
- std::move(*cloning_ctx).apply_to(log_entries, *obc);
+ cloning_ctx->apply_to(log_entries, *obc);
}
if (snapc.seq > obc->ssc->snapset.seq) {
// update snapset with latest snap context
diff --git a/src/crimson/osd/ops_executer.h b/src/crimson/osd/ops_executer.h
index e770e825b32..94b64ccebb1 100644
--- a/src/crimson/osd/ops_executer.h
+++ b/src/crimson/osd/ops_executer.h
@@ -197,10 +197,11 @@ private:
struct CloningContext {
SnapSet new_snapset;
pg_log_entry_t log_entry;
+ ObjectContextRef clone_obc;
void apply_to(
std::vector<pg_log_entry_t>& log_entries,
- ObjectContext& processed_obc) &&;
+ ObjectContext& processed_obc);
};
std::unique_ptr<CloningContext> cloning_ctx;
@@ -504,6 +505,7 @@ OpsExecuter::flush_changes_n_do_ops_effects(
ceph_assert(want_mutate);
}
+ apply_stats();
if (want_mutate) {
auto log_entries = flush_clone_metadata(
prepare_transaction(ops),
@@ -519,14 +521,15 @@ OpsExecuter::flush_changes_n_do_ops_effects(
std::move(txn),
std::move(obc),
std::move(*osd_op_params),
- std::move(log_entries));
+ std::move(log_entries),
+ cloning_ctx
+ ? std::move(cloning_ctx->clone_obc)
+ : nullptr);
submitted = std::move(_submitted);
all_completed = std::move(_all_completed);
}
- apply_stats();
-
if (op_effects.size()) [[unlikely]] {
// need extra ref pg due to apply_stats() which can be executed after
// informing snap mapper
diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc
index 34ad97ceb06..0f19bfd7145 100644
--- a/src/crimson/osd/osd.cc
+++ b/src/crimson/osd/osd.cc
@@ -504,6 +504,8 @@ seastar::future<> OSD::start()
}).then_unpack([this] {
return _add_me_to_crush();
}).then([this] {
+ return _add_device_class();
+ }).then([this] {
monc->sub_want("osd_pg_creates", last_pg_create_epoch, 0);
monc->sub_want("mgrmap", 0, 0);
monc->sub_want("osdmap", 0, 0);
@@ -608,6 +610,38 @@ seastar::future<> OSD::_send_boot()
return monc->send_message(std::move(m));
}
+seastar::future<> OSD::_add_device_class()
+{
+ LOG_PREFIX(OSD::_add_device_class);
+ if (!local_conf().get_val<bool>("osd_class_update_on_start")) {
+ co_return;
+ }
+
+ std::string device_class = co_await store.get_default_device_class();
+ if (device_class.empty()) {
+ WARN("Device class is empty; skipping crush update.");
+ co_return;
+ }
+
+ INFO("device_class is {} ", device_class);
+
+ std::string cmd = fmt::format(
+ R"({{"prefix": "osd crush set-device-class", "class": "{}", "ids": ["{}"]}})",
+ device_class, stringify(whoami)
+ );
+
+ auto [code, message, out] = co_await monc->run_command(std::move(cmd), {});
+ if (code) {
+ // to be caught by crimson/osd/main.cc
+ WARN("fail to set device_class : {} ({})", message, code);
+ throw std::runtime_error("fail to set device_class");
+ } else {
+ INFO("device_class was set: {}", message);
+ }
+
+ co_return;
+}
+
seastar::future<> OSD::_add_me_to_crush()
{
LOG_PREFIX(OSD::_add_me_to_crush);
diff --git a/src/crimson/osd/osd.h b/src/crimson/osd/osd.h
index d7d54d5d2c3..1a84ccd6a3f 100644
--- a/src/crimson/osd/osd.h
+++ b/src/crimson/osd/osd.h
@@ -188,6 +188,7 @@ private:
seastar::future<> _preboot(version_t oldest_osdmap, version_t newest_osdmap);
seastar::future<> _send_boot();
seastar::future<> _add_me_to_crush();
+ seastar::future<> _add_device_class();
seastar::future<> osdmap_subscribe(version_t epoch, bool force_request);
diff --git a/src/crimson/osd/osd_operations/snaptrim_event.cc b/src/crimson/osd/osd_operations/snaptrim_event.cc
index c5bdcae47f2..8cab6125682 100644
--- a/src/crimson/osd/osd_operations/snaptrim_event.cc
+++ b/src/crimson/osd/osd_operations/snaptrim_event.cc
@@ -435,6 +435,7 @@ SnapTrimObjSubEvent::process_and_submit(ObjectContextRef head_obc,
auto [submitted, all_completed] = co_await pg->submit_transaction(
std::move(clone_obc),
+ nullptr,
std::move(txn),
std::move(osd_op_p),
std::move(log_entries)
diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc
index de39fb45716..1e2988efbbe 100644
--- a/src/crimson/osd/pg.cc
+++ b/src/crimson/osd/pg.cc
@@ -907,11 +907,23 @@ void PG::mutate_object(
}
}
+void PG::enqueue_push_for_backfill(
+ const hobject_t &obj,
+ const eversion_t &v,
+ const std::vector<pg_shard_t> &peers)
+{
+ assert(recovery_handler);
+ assert(recovery_handler->backfill_state);
+ auto backfill_state = recovery_handler->backfill_state.get();
+ backfill_state->enqueue_standalone_push(obj, v, peers);
+}
+
PG::interruptible_future<
std::tuple<PG::interruptible_future<>,
PG::interruptible_future<>>>
PG::submit_transaction(
ObjectContextRef&& obc,
+ ObjectContextRef&& new_clone,
ceph::os::Transaction&& txn,
osd_op_params_t&& osd_op_p,
std::vector<pg_log_entry_t>&& log_entries)
@@ -924,8 +936,9 @@ PG::submit_transaction(
}
epoch_t map_epoch = get_osdmap_epoch();
+ auto at_version = osd_op_p.at_version;
- peering_state.pre_submit_op(obc->obs.oi.soid, log_entries, osd_op_p.at_version);
+ peering_state.pre_submit_op(obc->obs.oi.soid, log_entries, at_version);
peering_state.update_trim_to();
ceph_assert(!log_entries.empty());
@@ -939,6 +952,7 @@ PG::submit_transaction(
auto [submitted, all_completed] = co_await backend->submit_transaction(
peering_state.get_acting_recovery_backfill(),
obc->obs.oi.soid,
+ std::move(new_clone),
std::move(txn),
std::move(osd_op_p),
peering_state.get_last_peering_reset(),
@@ -947,8 +961,8 @@ PG::submit_transaction(
co_return std::make_tuple(
std::move(submitted),
all_completed.then_interruptible(
- [this, last_complete=peering_state.get_info().last_complete,
- at_version=osd_op_p.at_version](auto acked) {
+ [this, last_complete=peering_state.get_info().last_complete, at_version]
+ (auto acked) {
for (const auto& peer : acked) {
peering_state.update_peer_last_complete_ondisk(
peer.shard, peer.last_complete_ondisk);
@@ -1153,11 +1167,13 @@ PG::submit_executer_fut PG::submit_executer(
[FNAME, this](auto&& txn,
auto&& obc,
auto&& osd_op_p,
- auto&& log_entries) {
+ auto&& log_entries,
+ auto&& new_clone) {
DEBUGDPP("object {} submitting txn", *this, obc->get_oid());
mutate_object(obc, txn, osd_op_p);
return submit_transaction(
std::move(obc),
+ std::move(new_clone),
std::move(txn),
std::move(osd_op_p),
std::move(log_entries));
@@ -1604,7 +1620,7 @@ bool PG::should_send_op(
// missing set
hoid <= peering_state.get_peer_info(peer).last_backfill ||
(has_backfill_state() && hoid <= get_last_backfill_started() &&
- !peering_state.get_peer_missing(peer).is_missing(hoid)));
+ !is_missing_on_peer(peer, hoid)));
if (!should_send) {
ceph_assert(is_backfill_target(peer));
logger().debug("{} issue_repop shipping empty opt to osd."
diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h
index b0429c8fb4f..15aeec0e4f3 100644
--- a/src/crimson/osd/pg.h
+++ b/src/crimson/osd/pg.h
@@ -45,6 +45,7 @@
class MQuery;
class OSDMap;
class PGBackend;
+class ReplicatedBackend;
class PGPeeringEvent;
class osd_op_params_t;
@@ -678,6 +679,7 @@ private:
std::tuple<interruptible_future<>, interruptible_future<>>>
submit_transaction(
ObjectContextRef&& obc,
+ ObjectContextRef&& new_clone,
ceph::os::Transaction&& txn,
osd_op_params_t&& oop,
std::vector<pg_log_entry_t>&& log_entries);
@@ -885,6 +887,10 @@ private:
friend class SnapTrimObjSubEvent;
private:
+ void enqueue_push_for_backfill(
+ const hobject_t &obj,
+ const eversion_t &v,
+ const std::vector<pg_shard_t> &peers);
void mutate_object(
ObjectContextRef& obc,
ceph::os::Transaction& txn,
@@ -893,7 +899,7 @@ private:
bool can_discard_op(const MOSDOp& m) const;
void context_registry_on_change();
bool is_missing_object(const hobject_t& soid) const {
- return peering_state.get_pg_log().get_missing().get_items().count(soid);
+ return get_local_missing().is_missing(soid);
}
bool is_unreadable_object(const hobject_t &oid,
eversion_t* v = 0) const final {
@@ -901,6 +907,11 @@ private:
!peering_state.get_missing_loc().readable_with_acting(
oid, get_actingset(), v);
}
+ bool is_missing_on_peer(
+ const pg_shard_t &peer,
+ const hobject_t &soid) const {
+ return peering_state.get_peer_missing(peer).is_missing(soid);
+ }
bool is_degraded_or_backfilling_object(const hobject_t& soid) const;
const std::set<pg_shard_t> &get_actingset() const {
return peering_state.get_actingset();
@@ -908,6 +919,7 @@ private:
private:
friend class IOInterruptCondition;
+ friend class ::ReplicatedBackend;
struct log_update_t {
std::set<pg_shard_t> waiting_on;
seastar::shared_promise<> all_committed;
diff --git a/src/crimson/osd/pg_backend.h b/src/crimson/osd/pg_backend.h
index fa1f1405ffe..813218983fd 100644
--- a/src/crimson/osd/pg_backend.h
+++ b/src/crimson/osd/pg_backend.h
@@ -414,6 +414,7 @@ public:
virtual rep_op_fut_t
submit_transaction(const std::set<pg_shard_t> &pg_shards,
const hobject_t& hoid,
+ crimson::osd::ObjectContextRef&& new_clone,
ceph::os::Transaction&& txn,
osd_op_params_t&& osd_op_p,
epoch_t min_epoch, epoch_t max_epoch,
diff --git a/src/crimson/osd/pg_recovery.h b/src/crimson/osd/pg_recovery.h
index 705b3176b97..657e6d3e888 100644
--- a/src/crimson/osd/pg_recovery.h
+++ b/src/crimson/osd/pg_recovery.h
@@ -45,6 +45,10 @@ public:
seastar::future<> stop() { return seastar::now(); }
void on_pg_clean();
+ void enqueue_push(
+ const hobject_t& obj,
+ const eversion_t& v,
+ const std::vector<pg_shard_t> &peers) final;
private:
PGRecoveryListener* pg;
size_t start_primary_recovery_ops(
@@ -108,10 +112,6 @@ private:
const hobject_t& end) final;
void request_primary_scan(
const hobject_t& begin) final;
- void enqueue_push(
- const hobject_t& obj,
- const eversion_t& v,
- const std::vector<pg_shard_t> &peers) final;
void enqueue_drop(
const pg_shard_t& target,
const hobject_t& obj,
diff --git a/src/crimson/osd/replicated_backend.cc b/src/crimson/osd/replicated_backend.cc
index 12ee38b4370..f09cd147ea9 100644
--- a/src/crimson/osd/replicated_backend.cc
+++ b/src/crimson/osd/replicated_backend.cc
@@ -36,19 +36,59 @@ ReplicatedBackend::_read(const hobject_t& hoid,
return store->read(coll, ghobject_t{hoid}, off, len, flags);
}
+MURef<MOSDRepOp> ReplicatedBackend::new_repop_msg(
+ const pg_shard_t &pg_shard,
+ const hobject_t &hoid,
+ const bufferlist &encoded_txn,
+ const osd_op_params_t &osd_op_p,
+ epoch_t min_epoch,
+ epoch_t map_epoch,
+ const std::vector<pg_log_entry_t> &log_entries,
+ bool send_op,
+ ceph_tid_t tid)
+{
+ ceph_assert(pg_shard != whoami);
+ auto m = crimson::make_message<MOSDRepOp>(
+ osd_op_p.req_id,
+ whoami,
+ spg_t{pgid, pg_shard.shard},
+ hoid,
+ CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK,
+ map_epoch,
+ min_epoch,
+ tid,
+ osd_op_p.at_version);
+ if (send_op) {
+ m->set_data(encoded_txn);
+ } else {
+ ceph::os::Transaction t;
+ bufferlist bl;
+ encode(t, bl);
+ m->set_data(bl);
+ }
+ encode(log_entries, m->logbl);
+ m->pg_trim_to = osd_op_p.pg_trim_to;
+ m->pg_committed_to = osd_op_p.pg_committed_to;
+ m->pg_stats = pg.get_info().stats;
+ return m;
+}
+
ReplicatedBackend::rep_op_fut_t
-ReplicatedBackend::submit_transaction(const std::set<pg_shard_t>& pg_shards,
- const hobject_t& hoid,
- ceph::os::Transaction&& t,
- osd_op_params_t&& opp,
- epoch_t min_epoch, epoch_t map_epoch,
- std::vector<pg_log_entry_t>&& logv)
+ReplicatedBackend::submit_transaction(
+ const std::set<pg_shard_t> &pg_shards,
+ const hobject_t& hoid,
+ crimson::osd::ObjectContextRef &&new_clone,
+ ceph::os::Transaction&& t,
+ osd_op_params_t&& opp,
+ epoch_t min_epoch, epoch_t map_epoch,
+ std::vector<pg_log_entry_t>&& logv)
{
LOG_PREFIX(ReplicatedBackend::submit_transaction);
DEBUGDPP("object {}", dpp, hoid);
auto log_entries = std::move(logv);
auto txn = std::move(t);
auto osd_op_p = std::move(opp);
+ auto _new_clone = std::move(new_clone);
const ceph_tid_t tid = shard_services.get_tid();
auto pending_txn =
@@ -60,37 +100,34 @@ ReplicatedBackend::submit_transaction(const std::set<pg_shard_t>& pg_shards,
le.mark_unrollbackable();
}
+ std::vector<pg_shard_t> to_push_clone;
auto sends = std::make_unique<std::vector<seastar::future<>>>();
- for (auto pg_shard : pg_shards) {
- if (pg_shard != whoami) {
- auto m = crimson::make_message<MOSDRepOp>(
- osd_op_p.req_id,
- whoami,
- spg_t{pgid, pg_shard.shard},
- hoid,
- CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK,
- map_epoch,
- min_epoch,
- tid,
- osd_op_p.at_version);
- if (pg.should_send_op(pg_shard, hoid)) {
- m->set_data(encoded_txn);
- } else {
- ceph::os::Transaction t;
- bufferlist bl;
- encode(t, bl);
- m->set_data(bl);
+ for (auto &pg_shard : pg_shards) {
+ if (pg_shard == whoami) {
+ continue;
+ }
+ MURef<MOSDRepOp> m;
+ if (pg.should_send_op(pg_shard, hoid)) {
+ m = new_repop_msg(
+ pg_shard, hoid, encoded_txn, osd_op_p,
+ min_epoch, map_epoch, log_entries, true, tid);
+ } else {
+ m = new_repop_msg(
+ pg_shard, hoid, encoded_txn, osd_op_p,
+ min_epoch, map_epoch, log_entries, false, tid);
+ if (_new_clone && pg.is_missing_on_peer(pg_shard, hoid)) {
+ // The head is in the push queue but hasn't been pushed yet.
+ // We need to ensure that the newly created clone will be
+ // pushed as well, otherwise we might skip it.
+ // See: https://tracker.ceph.com/issues/68808
+ to_push_clone.push_back(pg_shard);
}
- pending_txn->second.acked_peers.push_back({pg_shard, eversion_t{}});
- encode(log_entries, m->logbl);
- m->pg_trim_to = osd_op_p.pg_trim_to;
- m->pg_committed_to = osd_op_p.pg_committed_to;
- m->pg_stats = pg.get_info().stats;
- // TODO: set more stuff. e.g., pg_states
- sends->emplace_back(
- shard_services.send_to_osd(
- pg_shard.osd, std::move(m), map_epoch));
}
+ pending_txn->second.acked_peers.push_back({pg_shard, eversion_t{}});
+ // TODO: set more stuff. e.g., pg_states
+ sends->emplace_back(
+ shard_services.send_to_osd(
+ pg_shard.osd, std::move(m), map_epoch));
}
co_await pg.update_snap_map(log_entries, txn);
@@ -120,9 +157,16 @@ ReplicatedBackend::submit_transaction(const std::set<pg_shard_t>& pg_shards,
return seastar::now();
}
return peers->all_committed.get_shared_future();
- }).then_interruptible([pending_txn, this] {
+ }).then_interruptible([pending_txn, this, _new_clone,
+ to_push_clone=std::move(to_push_clone)] {
auto acked_peers = std::move(pending_txn->second.acked_peers);
pending_trans.erase(pending_txn);
+ if (_new_clone && !to_push_clone.empty()) {
+ pg.enqueue_push_for_backfill(
+ _new_clone->obs.oi.soid,
+ _new_clone->obs.oi.version,
+ to_push_clone);
+ }
return seastar::make_ready_future<
crimson::osd::acked_peers_t>(std::move(acked_peers));
});
diff --git a/src/crimson/osd/replicated_backend.h b/src/crimson/osd/replicated_backend.h
index fb8704d8742..d5844b23a0c 100644
--- a/src/crimson/osd/replicated_backend.h
+++ b/src/crimson/osd/replicated_backend.h
@@ -35,6 +35,7 @@ private:
rep_op_fut_t submit_transaction(
const std::set<pg_shard_t> &pg_shards,
const hobject_t& hoid,
+ crimson::osd::ObjectContextRef&& new_clone,
ceph::os::Transaction&& txn,
osd_op_params_t&& osd_op_p,
epoch_t min_epoch, epoch_t max_epoch,
@@ -60,6 +61,17 @@ private:
pending_transactions_t pending_trans;
crimson::osd::PG& pg;
+ MURef<MOSDRepOp> new_repop_msg(
+ const pg_shard_t &pg_shard,
+ const hobject_t &hoid,
+ const bufferlist &encoded_txn,
+ const osd_op_params_t &osd_op_p,
+ epoch_t min_epoch,
+ epoch_t map_epoch,
+ const std::vector<pg_log_entry_t> &log_entries,
+ bool send_op,
+ ceph_tid_t tid);
+
seastar::future<> request_committed(
const osd_reqid_t& reqid, const eversion_t& at_version) final;
};
diff --git a/src/exporter/DaemonMetricCollector.cc b/src/exporter/DaemonMetricCollector.cc
index 4b8a8131bcf..d27b3ac43c5 100644
--- a/src/exporter/DaemonMetricCollector.cc
+++ b/src/exporter/DaemonMetricCollector.cc
@@ -29,9 +29,16 @@ using json_object = boost::json::object;
using json_value = boost::json::value;
using json_array = boost::json::array;
-void DaemonMetricCollector::request_loop(boost::asio::steady_timer &timer) {
- timer.async_wait([&](const boost::system::error_code &e) {
- std::cerr << e << std::endl;
+void DaemonMetricCollector::request_loop() {
+ timer.async_wait([this](const boost::system::error_code &e) {
+ if (shutdown_flag) {
+ dout(1) << "Metric collector request loop cancelled" << dendl;
+ return;
+ }
+
+ if (e) return; // Exit on error or cancellation
+
+ dout(10) << "Getting metrics loop..." << dendl;
update_sockets();
bool sort_metrics = g_conf().get_val<bool>("exporter_sort_metrics");
@@ -42,19 +49,24 @@ void DaemonMetricCollector::request_loop(boost::asio::steady_timer &timer) {
auto stats_period = g_conf().get_val<int64_t>("exporter_stats_period");
// time to wait before sending requests again
timer.expires_from_now(std::chrono::seconds(stats_period));
- request_loop(timer);
+ request_loop();
});
}
void DaemonMetricCollector::main() {
- // time to wait before sending requests again
-
- boost::asio::io_context io;
- boost::asio::steady_timer timer{io, std::chrono::seconds(0)};
- request_loop(timer);
+ shutdown_flag = false;
+ timer.expires_from_now(std::chrono::seconds(0));
+ request_loop();
io.run();
}
+void DaemonMetricCollector::shutdown(){
+ shutdown_flag = true;
+ timer.cancel(); // Explicitly cancel the timer
+ dout(1) << "Collector shutdown initiated, timer canceled" << dendl;
+ io.stop();
+}
+
std::string DaemonMetricCollector::get_metrics() {
const std::lock_guard<std::mutex> lock(metrics_mutex);
return metrics;
@@ -499,3 +511,4 @@ DaemonMetricCollector &collector_instance() {
static DaemonMetricCollector instance;
return instance;
}
+
diff --git a/src/exporter/DaemonMetricCollector.h b/src/exporter/DaemonMetricCollector.h
index 3302e95df91..5831a0fa3b0 100644
--- a/src/exporter/DaemonMetricCollector.h
+++ b/src/exporter/DaemonMetricCollector.h
@@ -1,17 +1,20 @@
#pragma once
#include "common/admin_socket_client.h"
+#include <atomic>
#include <map>
#include <string>
#include <vector>
#include <boost/asio/steady_timer.hpp>
+#include <boost/thread.hpp>
#include <boost/json/object.hpp>
#include <filesystem>
#include <map>
#include <string>
#include <vector>
+
struct pstat {
unsigned long utime;
unsigned long stime;
@@ -43,11 +46,16 @@ public:
std::string metrics;
std::pair<labels_t, std::string> add_fixed_name_metrics(std::string metric_name);
void update_sockets();
+ void shutdown();
private:
std::mutex metrics_mutex;
std::unique_ptr<MetricsBuilder> builder;
- void request_loop(boost::asio::steady_timer &timer);
+ boost::asio::io_context io;
+ boost::asio::steady_timer timer{io};
+ std::atomic<bool> shutdown_flag{false};
+
+ void request_loop();
void dump_asok_metric(boost::json::object perf_info,
boost::json::value perf_values, std::string name,
@@ -108,3 +116,4 @@ public:
};
DaemonMetricCollector &collector_instance();
+
diff --git a/src/exporter/ceph_exporter.cc b/src/exporter/ceph_exporter.cc
index 2e2c16bb085..44b67c7e615 100644
--- a/src/exporter/ceph_exporter.cc
+++ b/src/exporter/ceph_exporter.cc
@@ -1,16 +1,31 @@
#include "common/ceph_argparse.h"
#include "common/config.h"
-#include "exporter/DaemonMetricCollector.h"
-#include "exporter/web_server.h"
+#include "common/debug.h"
#include "global/global_init.h"
#include "global/global_context.h"
-
+#include "global/signal_handler.h"
+#include "exporter/DaemonMetricCollector.h"
+#include "exporter/web_server.h"
#include <boost/thread/thread.hpp>
#include <iostream>
#include <map>
#include <string>
+#include <atomic>
+#include <chrono>
+#include <thread>
#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_ceph_exporter
+
+DaemonMetricCollector &collector = collector_instance();
+
+static void handle_signal(int signum)
+{
+ ceph_assert(signum == SIGINT || signum == SIGTERM);
+ derr << "*** Got signal " << sig_str(signum) << " ***" << dendl;
+ // Finish the DaemonMetricCollector
+ collector.shutdown();
+}
static void usage() {
std::cout << "usage: ceph-exporter [options]\n"
@@ -27,7 +42,6 @@ static void usage() {
}
int main(int argc, char **argv) {
-
auto args = argv_to_vec(argc, argv);
if (args.empty()) {
std::cerr << argv[0] << ": -h or --help for usage" << std::endl;
@@ -64,8 +78,30 @@ int main(int argc, char **argv) {
}
common_init_finish(g_ceph_context);
+ // Register signal handlers
+ init_async_signal_handler();
+ register_async_signal_handler(SIGHUP, sighup_handler);
+ register_async_signal_handler_oneshot(SIGINT, handle_signal);
+ register_async_signal_handler_oneshot(SIGTERM, handle_signal);
+
+ // Start the web server thread
boost::thread server_thread(web_server_thread_entrypoint);
- DaemonMetricCollector &collector = collector_instance();
+
+ // Start the DaemonMetricCollector
collector.main();
+
+ // Interrupted. Time to terminate
+ unregister_async_signal_handler(SIGHUP, sighup_handler);
+ unregister_async_signal_handler(SIGINT, handle_signal);
+ unregister_async_signal_handler(SIGTERM, handle_signal);
+ shutdown_async_signal_handler();
+
+ // Stop the web server thread by interrupting it
+ stop_web_server();
+ server_thread.interrupt(); // Interrupt the web server thread
server_thread.join();
+
+ dout(1) << "Ceph exporter stopped" << dendl;
+
+ return 0;
}
diff --git a/src/exporter/web_server.cc b/src/exporter/web_server.cc
index 96cc02b389f..c01205f26bb 100644
--- a/src/exporter/web_server.cc
+++ b/src/exporter/web_server.cc
@@ -28,6 +28,9 @@ namespace net = boost::asio; // from <boost/asio.hpp>
namespace ssl = boost::asio::ssl; // from <boost/asio/ssl.hpp>
using tcp = boost::asio::ip::tcp; // from <boost/asio/ip/tcp.hpp>
+//common io context for the web servers
+std::shared_ptr<net::io_context> global_ioc;
+
// Base class for common functionality
class web_connection {
public:
@@ -43,7 +46,7 @@ protected:
web_connection(net::any_io_executor executor, std::chrono::seconds timeout)
: deadline_(executor, timeout) {}
- // Common request processing logic
+ // Common request processing logic
void process_request() {
response_.version(request_.version());
response_.keep_alive(request_.keep_alive());
@@ -64,7 +67,7 @@ protected:
write_response();
}
- // Construct a response message based on the request target
+ // Construct a response message based on the request target
void create_response() {
if (request_.target() == "/") {
response_.result(http::status::moved_permanently);
@@ -81,7 +84,7 @@ protected:
}
}
- // Asynchronously transmit the response message
+ // Asynchronously transmit the response message
virtual void write_response() = 0;
// Check whether we have spent enough time on this connection
@@ -228,28 +231,33 @@ void https_server(tcp::acceptor &acceptor, ssl::context &ssl_ctx) {
}
void run_http_server(const std::string& exporter_addr, short unsigned int port) {
- net::io_context ioc{1};
- tcp::acceptor acceptor{ioc, {net::ip::make_address(exporter_addr), port}};
- tcp::socket socket{ioc};
+ tcp::acceptor acceptor{*global_ioc, {net::ip::make_address(exporter_addr), port}};
+ tcp::socket socket{*global_ioc};
http_server(acceptor, socket);
dout(1) << "HTTP server running on " << exporter_addr << ":" << port << dendl;
- ioc.run();
+ global_ioc->run();
}
void run_https_server(const std::string& exporter_addr, short unsigned int port, const std::string& cert_file, const std::string& key_file) {
- net::io_context ioc{1};
ssl::context ssl_ctx(ssl::context::tlsv13);
ssl_ctx.use_certificate_chain_file(cert_file);
ssl_ctx.use_private_key_file(key_file, ssl::context::pem);
- tcp::acceptor acceptor{ioc, {net::ip::make_address(exporter_addr), port}};
+ tcp::acceptor acceptor{*global_ioc, {net::ip::make_address(exporter_addr), port}};
https_server(acceptor, ssl_ctx);
dout(1) << "HTTPS server running on " << exporter_addr << ":" << port << dendl;
- ioc.run();
+ global_ioc->run();
+}
+
+void stop_web_server() {
+ if (global_ioc) {
+ global_ioc->stop();
+ dout(1) << "Ceph exporter web server stopped" << dendl;
+ }
}
void web_server_thread_entrypoint() {
@@ -259,18 +267,21 @@ void web_server_thread_entrypoint() {
std::string cert_file = g_conf().get_val<std::string>("exporter_cert_file");
std::string key_file = g_conf().get_val<std::string>("exporter_key_file");
+ // Initialize global_ioc
+ global_ioc = std::make_shared<net::io_context>(1);
+
if (cert_file.empty() && key_file.empty()) {
run_http_server(exporter_addr, port);
} else {
try {
run_https_server(exporter_addr, port, cert_file, key_file);
} catch (const std::exception &e) {
- dout(1) << "Failed to start HTTPS server: " << e.what() << dendl;
+ derr << "Failed to start HTTPS server: " << e.what() << dendl;
exit(EXIT_FAILURE);
}
}
} catch (std::exception const &e) {
- dout(1) << "Error: " << e.what() << dendl;
+ derr << "Error: " << e.what() << dendl;
exit(EXIT_FAILURE);
}
}
diff --git a/src/exporter/web_server.h b/src/exporter/web_server.h
index c3339a8d43a..c6d4c54eca4 100644
--- a/src/exporter/web_server.h
+++ b/src/exporter/web_server.h
@@ -3,3 +3,4 @@
#include <string>
void web_server_thread_entrypoint();
+void stop_web_server();
diff --git a/src/global/signal_handler.cc b/src/global/signal_handler.cc
index d3387267871..b8149718724 100644
--- a/src/global/signal_handler.cc
+++ b/src/global/signal_handler.cc
@@ -307,7 +307,7 @@ static void handle_oneshot_fatal_signal(int signum)
char buf[1024];
char pthread_name[16] = {0}; //limited by 16B include terminating null byte.
- int r = ceph_pthread_getname(pthread_self(), pthread_name, sizeof(pthread_name));
+ int r = ceph_pthread_getname(pthread_name, sizeof(pthread_name));
(void)r;
#if defined(__sun)
char message[SIG2STR_MAX];
diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h
index 627f4a3e85b..137669c1963 100644
--- a/src/include/ceph_fs.h
+++ b/src/include/ceph_fs.h
@@ -1005,7 +1005,7 @@ extern const char *ceph_cap_op_name(int op);
/* extra info for cap import/export */
struct ceph_mds_cap_peer {
__le64 cap_id;
- __le32 seq;
+ __le32 issue_seq;
__le32 mseq;
__le32 mds;
__u8 flags;
@@ -1058,7 +1058,7 @@ struct ceph_mds_cap_release {
struct ceph_mds_cap_item {
__le64 ino;
__le64 cap_id;
- __le32 migrate_seq, seq;
+ __le32 migrate_seq, issue_seq;
} __attribute__ ((packed));
#define CEPH_MDS_LEASE_REVOKE 1 /* mds -> client */
diff --git a/src/include/compat.h b/src/include/compat.h
index 53285243d91..a7d10fc5425 100644
--- a/src/include/compat.h
+++ b/src/include/compat.h
@@ -179,60 +179,12 @@ struct cpu_set_t;
#define MSG_DONTWAIT MSG_NONBLOCK
#endif
-/* compiler warning free success noop */
-#define pthread_setname_noop_helper(thread, name) ({ \
- int __i = 0; \
- __i; })
-
-#define pthread_getname_noop_helper(thread, name, len) ({ \
- if (name != NULL) \
- *name = '\0'; \
- 0; })
-
#define pthread_kill_unsupported_helper(thread, signal) ({ \
int __i = -ENOTSUP; \
__i; })
#if defined(_WIN32) && defined(__clang__) && \
!defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
- // In this case, llvm doesn't use the pthread api for std::thread.
- // We cannot use native_handle() with the pthread api, nor can we pass
- // it to Windows API functions.
- #define ceph_pthread_setname pthread_setname_noop_helper
-#elif defined(HAVE_PTHREAD_SETNAME_NP)
- #if defined(__APPLE__)
- #define ceph_pthread_setname(thread, name) ({ \
- int __result = 0; \
- if (thread == pthread_self()) \
- __result = pthread_setname_np(name); \
- __result; })
- #else
- #define ceph_pthread_setname pthread_setname_np
- #endif
-#elif defined(HAVE_PTHREAD_SET_NAME_NP)
- /* Fix a small name diff and return 0 */
- #define ceph_pthread_setname(thread, name) ({ \
- pthread_set_name_np(thread, name); \
- 0; })
-#else
- #define ceph_pthread_setname pthread_setname_noop_helper
-#endif
-
-#if defined(_WIN32) && defined(__clang__) && \
- !defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
- #define ceph_pthread_getname pthread_getname_noop_helper
-#elif defined(HAVE_PTHREAD_GETNAME_NP)
- #define ceph_pthread_getname pthread_getname_np
-#elif defined(HAVE_PTHREAD_GET_NAME_NP)
- #define ceph_pthread_getname(thread, name, len) ({ \
- pthread_get_name_np(thread, name, len); \
- 0; })
-#else
- #define ceph_pthread_getname pthread_getname_noop_helper
-#endif
-
-#if defined(_WIN32) && defined(__clang__) && \
- !defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
#define ceph_pthread_kill pthread_kill_unsupported_helper
#else
#define ceph_pthread_kill pthread_kill
@@ -244,6 +196,9 @@ int ceph_posix_fallocate(int fd, off_t offset, off_t len);
extern "C" {
#endif
+int ceph_pthread_getname(char* name, size_t size);
+int ceph_pthread_setname(const char* name);
+
int pipe_cloexec(int pipefd[2], int flags);
char *ceph_strerror_r(int errnum, char *buf, size_t buflen);
unsigned get_page_size();
diff --git a/src/include/elist.h b/src/include/elist.h
index edfb7955494..e777873b045 100644
--- a/src/include/elist.h
+++ b/src/include/elist.h
@@ -15,6 +15,10 @@
#ifndef CEPH_ELIST_H
#define CEPH_ELIST_H
+#include <cstddef> // for size_t
+
+#include "include/ceph_assert.h"
+
/*
* elist: embedded list.
*
diff --git a/src/include/str_list.h b/src/include/str_list.h
index cad76c1d6f5..a4c7432c6ef 100644
--- a/src/include/str_list.h
+++ b/src/include/str_list.h
@@ -2,7 +2,6 @@
#define CEPH_STRLIST_H
#include <list>
-#include <set>
#include <string>
#include <string_view>
#include <vector>
diff --git a/src/log/Entry.h b/src/log/Entry.h
index db39eca0ef3..eeb25c5f593 100644
--- a/src/log/Entry.h
+++ b/src/log/Entry.h
@@ -24,6 +24,7 @@ namespace logging {
class Entry {
public:
using time = log_time;
+ using thread_name_t = std::array<char, 16>;
Entry() = delete;
Entry(short pr, short sub) :
@@ -32,8 +33,7 @@ public:
m_prio(pr),
m_subsys(sub)
{
- strncpy(m_thread_name, Thread::get_thread_name().data(), 16);
- m_thread_name[15] = '\0';
+ ceph_pthread_getname(m_thread_name.data(), m_thread_name.size());
}
Entry(const Entry &) = default;
Entry& operator=(const Entry &) = default;
@@ -47,7 +47,7 @@ public:
time m_stamp;
pthread_t m_thread;
short m_prio, m_subsys;
- char m_thread_name[16];
+ thread_name_t m_thread_name{};
static log_clock& clock() {
static log_clock clock;
diff --git a/src/log/Log.cc b/src/log/Log.cc
index 49dd03c06c0..63d5205d9e2 100644
--- a/src/log/Log.cc
+++ b/src/log/Log.cc
@@ -31,6 +31,7 @@
#include <fmt/format.h>
#include <fmt/ostream.h>
+#include <fmt/ranges.h>
#define MAX_LOG_BUF 65536
@@ -372,6 +373,7 @@ void Log::_flush_logbuf()
void Log::_flush(EntryVector& t, bool crash)
{
+ auto now = mono_clock::now();
long len = 0;
if (t.empty()) {
assert(m_log_buf.empty());
@@ -443,10 +445,29 @@ void Log::_flush(EntryVector& t, bool crash)
m_journald->log_entry(e);
}
+ {
+ auto [it, _] = m_recent_thread_names.try_emplace(e.m_thread, now, DEFAULT_MAX_THREAD_NAMES);
+ auto& [t, names] = it->second;
+ if (names.size() == 0 || names.front() != e.m_thread_name.data()) {
+ names.push_front(e.m_thread_name.data());
+ }
+ t = now;
+ }
+
m_recent.push_back(std::move(e));
}
t.clear();
+ for (auto it = m_recent_thread_names.begin(); it != m_recent_thread_names.end(); ) {
+ auto t = it->second.first;
+ auto since = now - t;
+ if (since > std::chrono::seconds(60*60*24)) {
+ it = m_recent_thread_names.erase(it);
+ } else {
+ ++it;
+ }
+ }
+
_flush_logbuf();
}
@@ -493,14 +514,10 @@ void Log::dump_recent()
_flush(m_flush, false);
_log_message("--- begin dump of recent events ---", true);
- std::set<std::pair<pthread_t, const char *>> recent_pthread_ids;
{
EntryVector t;
t.insert(t.end(), std::make_move_iterator(m_recent.begin()), std::make_move_iterator(m_recent.end()));
m_recent.clear();
- for (const auto& e : t) {
- recent_pthread_ids.emplace(std::make_pair(e.m_thread, e.m_thread_name));
- }
_flush(t, true);
}
@@ -515,11 +532,15 @@ void Log::dump_recent()
m_stderr_log, m_stderr_crash), true);
_log_message("--- pthread ID / name mapping for recent threads ---", true);
- for (auto& [pthread_id, pthread_name] : recent_pthread_ids)
+ for (const auto& [tid, t_names] : m_recent_thread_names)
{
+ [[maybe_unused]] auto [t, names] = t_names;
// we want the ID to be printed in the same format as we use for a log entry.
// The reason is easier grepping.
- _log_message(fmt::format(" {:x} / {}", tid_to_int(pthread_id), pthread_name), true);
+ auto msg = fmt::format(" {:x} / {}",
+ tid_to_int(tid),
+ fmt::join(names, ", "));
+ _log_message(msg, true);
}
_log_message(fmt::format(" max_recent {:9}", m_recent.capacity()), true);
diff --git a/src/log/Log.h b/src/log/Log.h
index 3a60937af55..46d97734305 100644
--- a/src/log/Log.h
+++ b/src/log/Log.h
@@ -7,6 +7,7 @@
#include <boost/circular_buffer.hpp>
#include <condition_variable>
+#include <map>
#include <memory>
#include <mutex>
#include <queue>
@@ -14,6 +15,7 @@
#include <string_view>
#include "common/Thread.h"
+#include "common/ceph_time.h"
#include "common/likely.h"
#include "log/Entry.h"
@@ -86,9 +88,14 @@ protected:
private:
using EntryRing = boost::circular_buffer<ConcreteEntry>;
+ using mono_clock = ceph::coarse_mono_clock;
+ using mono_time = ceph::coarse_mono_time;
+
+ using RecentThreadNames = std::map<pthread_t, std::pair<mono_time, boost::circular_buffer<std::string> > >;
static const std::size_t DEFAULT_MAX_NEW = 100;
static const std::size_t DEFAULT_MAX_RECENT = 10000;
+ static constexpr std::size_t DEFAULT_MAX_THREAD_NAMES = 4;
Log **m_indirect_this;
@@ -102,6 +109,7 @@ private:
pthread_t m_queue_mutex_holder;
pthread_t m_flush_mutex_holder;
+ RecentThreadNames m_recent_thread_names; // protected by m_flush_mutex
EntryVector m_new; ///< new entries
EntryRing m_recent; ///< recent (less new) entries we've already written at low detail
EntryVector m_flush; ///< entries to be flushed (here to optimize heap allocations)
diff --git a/src/mds/Beacon.cc b/src/mds/Beacon.cc
index 642d3428a27..6fbfc79d416 100644
--- a/src/mds/Beacon.cc
+++ b/src/mds/Beacon.cc
@@ -61,6 +61,7 @@ void Beacon::shutdown()
std::unique_lock<std::mutex> lock(mutex);
if (!finished) {
finished = true;
+ cvar.notify_all();
lock.unlock();
if (sender.joinable())
sender.join();
@@ -74,7 +75,7 @@ void Beacon::init(const MDSMap &mdsmap)
_notify_mdsmap(mdsmap);
sender = std::thread([this]() {
- ceph_pthread_setname(pthread_self(), "beacon");
+ ceph_pthread_setname("mds-beacon");
std::unique_lock<std::mutex> lock(mutex);
bool sent;
while (!finished) {
@@ -320,16 +321,15 @@ void Beacon::notify_health(MDSRank const *mds)
// Detect MDS_HEALTH_TRIM condition
// Indicates MDS is not trimming promptly
{
- const auto log_max_segments = mds->mdlog->get_max_segments();
- const auto log_warn_factor = g_conf().get_val<double>("mds_log_warn_factor");
- if (mds->mdlog->get_num_segments() > (size_t)(log_max_segments * log_warn_factor)) {
+ if (mds->mdlog->is_trim_slow()) {
+ auto num_segments = mds->mdlog->get_num_segments();
+ auto max_segments = mds->mdlog->get_max_segments();
CachedStackStringStream css;
- *css << "Behind on trimming (" << mds->mdlog->get_num_segments()
- << "/" << log_max_segments << ")";
+ *css << "Behind on trimming (" << num_segments << "/" << max_segments << ")";
MDSHealthMetric m(MDS_HEALTH_TRIM, HEALTH_WARN, css->strv());
- m.metadata["num_segments"] = stringify(mds->mdlog->get_num_segments());
- m.metadata["max_segments"] = stringify(log_max_segments);
+ m.metadata["num_segments"] = stringify(num_segments);
+ m.metadata["max_segments"] = stringify(max_segments);
health.metrics.push_back(m);
}
}
diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc
index af9f8edfffa..7d28e039d9c 100644
--- a/src/mds/CDir.cc
+++ b/src/mds/CDir.cc
@@ -2494,6 +2494,10 @@ void CDir::_omap_commit_ops(int r, int op_prio, int64_t metapool, version_t vers
mdcache->mds->heartbeat_reset();
}
+ // the last omap commit includes the omap header, so account for
+ // that size early on so that when we reach `commit_one(true)`,
+ // there is enough space for the header.
+ write_size += sizeof(fnode_t);
using ceph::encode;
for (auto &item : to_set) {
bufferlist bl;
diff --git a/src/mds/Capability.h b/src/mds/Capability.h
index 9adcf3b25b9..0782464ad94 100644
--- a/src/mds/Capability.h
+++ b/src/mds/Capability.h
@@ -218,8 +218,6 @@ public:
void set_cap_id(uint64_t i) { cap_id = i; }
uint64_t get_cap_id() const { return cap_id; }
- //ceph_seq_t get_last_issue() { return last_issue; }
-
bool is_suppress() const { return suppress > 0; }
void inc_suppress() { suppress++; }
void dec_suppress() { suppress--; }
diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc
index 63608d48864..d64f176acb6 100644
--- a/src/mds/Locker.cc
+++ b/src/mds/Locker.cc
@@ -2599,6 +2599,7 @@ int Locker::issue_caps(CInode *in, Capability *only_cap)
in->find_snaprealm()->inode->ino(),
cap->get_cap_id(), cap->get_last_seq(),
pending, wanted, 0, cap->get_mseq(),
+ cap->get_last_issue(),
mds->get_osd_epoch_barrier());
in->encode_cap_message(m, cap);
@@ -2649,6 +2650,7 @@ int Locker::issue_caps(CInode *in, Capability *only_cap)
in->find_snaprealm()->inode->ino(),
cap->get_cap_id(), cap->get_last_seq(),
after, wanted, 0, cap->get_mseq(),
+ cap->get_last_issue(),
mds->get_osd_epoch_barrier());
in->encode_cap_message(m, cap);
@@ -2675,6 +2677,7 @@ void Locker::issue_truncate(CInode *in)
cap->get_cap_id(), cap->get_last_seq(),
cap->pending(), cap->wanted(), 0,
cap->get_mseq(),
+ cap->get_last_issue(),
mds->get_osd_epoch_barrier());
in->encode_cap_message(m, cap);
mds->send_message_client_counted(m, cap->get_session());
@@ -3165,6 +3168,7 @@ void Locker::share_inode_max_size(CInode *in, Capability *only_cap)
cap->pending(),
cap->wanted(), 0,
cap->get_mseq(),
+ cap->get_last_issue(),
mds->get_osd_epoch_barrier());
in->encode_cap_message(m, cap);
mds->send_message_client_counted(m, cap->get_session());
@@ -3375,10 +3379,10 @@ void Locker::handle_client_caps(const cref_t<MClientCaps> &m)
ref_t<MClientCaps> ack;
if (op == CEPH_CAP_OP_FLUSHSNAP) {
if (mds->logger) mds->logger->inc(l_mdss_ceph_cap_op_flushsnap_ack);
- ack = make_message<MClientCaps>(CEPH_CAP_OP_FLUSHSNAP_ACK, m->get_ino(), 0, 0, 0, 0, 0, dirty, 0, mds->get_osd_epoch_barrier());
+ ack = make_message<MClientCaps>(CEPH_CAP_OP_FLUSHSNAP_ACK, m->get_ino(), 0, 0, 0, 0, 0, dirty, 0, 0, mds->get_osd_epoch_barrier());
} else {
if (mds->logger) mds->logger->inc(l_mdss_ceph_cap_op_flush_ack);
- ack = make_message<MClientCaps>(CEPH_CAP_OP_FLUSH_ACK, m->get_ino(), 0, m->get_cap_id(), m->get_seq(), m->get_caps(), 0, dirty, 0, mds->get_osd_epoch_barrier());
+ ack = make_message<MClientCaps>(CEPH_CAP_OP_FLUSH_ACK, m->get_ino(), 0, m->get_cap_id(), m->get_seq(), m->get_caps(), 0, dirty, 0, 0, mds->get_osd_epoch_barrier());
}
ack->set_snap_follows(follows);
ack->set_client_tid(m->get_client_tid());
@@ -3500,7 +3504,7 @@ void Locker::handle_client_caps(const cref_t<MClientCaps> &m)
// case we get a dup response, so whatever.)
ref_t<MClientCaps> ack;
if (dirty) {
- ack = make_message<MClientCaps>(CEPH_CAP_OP_FLUSHSNAP_ACK, in->ino(), 0, 0, 0, 0, 0, dirty, 0, mds->get_osd_epoch_barrier());
+ ack = make_message<MClientCaps>(CEPH_CAP_OP_FLUSHSNAP_ACK, in->ino(), 0, 0, 0, 0, 0, dirty, 0, 0, mds->get_osd_epoch_barrier());
ack->set_snap_follows(follows);
ack->set_client_tid(m->get_client_tid());
ack->set_oldest_flush_tid(m->get_oldest_flush_tid());
@@ -3589,7 +3593,7 @@ void Locker::handle_client_caps(const cref_t<MClientCaps> &m)
dout(7) << " flush client." << client << " dirty " << ccap_string(dirty)
<< " seq " << m->get_seq() << " on " << *in << dendl;
ack = make_message<MClientCaps>(CEPH_CAP_OP_FLUSH_ACK, in->ino(), 0, cap->get_cap_id(), m->get_seq(),
- m->get_caps(), 0, dirty, 0, mds->get_osd_epoch_barrier());
+ m->get_caps(), 0, dirty, 0, cap->get_last_issue(), mds->get_osd_epoch_barrier());
ack->set_client_tid(m->get_client_tid());
ack->set_oldest_flush_tid(m->get_oldest_flush_tid());
}
@@ -4222,7 +4226,7 @@ void Locker::handle_client_cap_release(const cref_t<MClientCapRelease> &m)
Session *session = mds->get_session(m);
for (const auto &cap : m->caps) {
- _do_cap_release(client, inodeno_t((uint64_t)cap.ino) , cap.cap_id, cap.migrate_seq, cap.seq);
+ _do_cap_release(client, inodeno_t((uint64_t)cap.ino) , cap.cap_id, cap.migrate_seq, cap.issue_seq);
}
if (session) {
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index eb2b529dcfa..3af0d8c6b1e 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -5891,7 +5891,7 @@ void MDCache::do_cap_import(Session *session, CInode *in, Capability *cap,
auto reap = make_message<MClientCaps>(CEPH_CAP_OP_IMPORT,
in->ino(), realm->inode->ino(), cap->get_cap_id(),
cap->get_last_seq(), cap->pending(), cap->wanted(),
- 0, cap->get_mseq(), mds->get_osd_epoch_barrier());
+ 0, cap->get_mseq(), cap->get_last_issue(), mds->get_osd_epoch_barrier());
in->encode_cap_message(reap, cap);
reap->snapbl = mds->server->get_snap_trace(session, realm);
reap->set_cap_peer(p_cap_id, p_seq, p_mseq, peer, p_flags);
@@ -14378,6 +14378,7 @@ bool MDCache::is_ready_to_trim_cache(void)
void MDCache::upkeep_main(void)
{
+ ceph_pthread_setname("mds-cache-trim");
std::unique_lock lock(upkeep_mutex);
// create a "memory model" for the upkeep thread. The object maintains
diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc
index 0be568433ef..4bbf2a1a141 100644
--- a/src/mds/MDLog.cc
+++ b/src/mds/MDLog.cc
@@ -53,11 +53,12 @@ MDLog::MDLog(MDSRank* m)
event_large_threshold = g_conf().get_val<uint64_t>("mds_log_event_large_threshold");
events_per_segment = g_conf().get_val<uint64_t>("mds_log_events_per_segment");
pause = g_conf().get_val<bool>("mds_log_pause");
- major_segment_event_ratio = g_conf().get_val<uint64_t>("mds_log_major_segment_event_ratio");
max_segments = g_conf().get_val<uint64_t>("mds_log_max_segments");
max_events = g_conf().get_val<int64_t>("mds_log_max_events");
skip_corrupt_events = g_conf().get_val<bool>("mds_log_skip_corrupt_events");
skip_unbounded_events = g_conf().get_val<bool>("mds_log_skip_unbounded_events");
+ log_warn_factor = g_conf().get_val<double>("mds_log_warn_factor");
+ minor_segments_per_major_segment = g_conf().get_val<uint64_t>("mds_log_minor_segments_per_major_segment");
upkeep_thread = std::thread(&MDLog::log_trim_upkeep, this);
}
@@ -258,7 +259,7 @@ void MDLog::create(MDSContext *c)
logger->set(l_mdl_expos, journaler->get_expire_pos());
logger->set(l_mdl_wrpos, journaler->get_write_pos());
- submit_thread.create("md_submit");
+ submit_thread.create("mds-log-submit");
}
void MDLog::open(MDSContext *c)
@@ -267,9 +268,9 @@ void MDLog::open(MDSContext *c)
ceph_assert(!recovery_thread.is_started());
recovery_thread.set_completion(c);
- recovery_thread.create("md_recov_open");
+ recovery_thread.create("mds-log-recvr");
- submit_thread.create("md_submit");
+ submit_thread.create("mds-log-submit");
// either append() or replay() will follow.
}
@@ -311,7 +312,7 @@ void MDLog::reopen(MDSContext *c)
recovery_thread.join();
recovery_thread.set_completion(new C_ReopenComplete(this, c));
- recovery_thread.create("md_recov_reopen");
+ recovery_thread.create("mds-log-reopen");
}
void MDLog::append()
@@ -357,14 +358,15 @@ void MDLog::_submit_entry(LogEvent *le, MDSLogContextBase* c)
ceph_assert(!mds_is_shutting_down);
event_seq++;
- events_since_last_major_segment++;
if (auto sb = dynamic_cast<SegmentBoundary*>(le); sb) {
auto ls = _start_new_segment(sb);
if (sb->is_major_segment_boundary()) {
major_segments.insert(ls->seq);
logger->set(l_mdl_segmjr, major_segments.size());
- events_since_last_major_segment = 0;
+ minor_segments_since_last_major_segment = 0;
+ } else {
+ ++minor_segments_since_last_major_segment;
}
}
@@ -403,7 +405,7 @@ void MDLog::_segment_upkeep()
uint64_t period = journaler->get_layout_period();
auto ls = get_current_segment();
// start a new segment?
- if (events_since_last_major_segment > events_per_segment*major_segment_event_ratio) {
+ if (minor_segments_since_last_major_segment > minor_segments_per_major_segment) {
dout(10) << __func__ << ": starting new major segment, current " << *ls << dendl;
auto sle = mds->mdcache->create_subtree_map();
_submit_entry(sle, NULL);
@@ -656,7 +658,13 @@ void MDLog::try_to_commit_open_file_table(uint64_t last_seq)
}
}
+bool MDLog::is_trim_slow() const {
+ return (segments.size() > (size_t)(max_segments * log_warn_factor));
+}
+
void MDLog::log_trim_upkeep(void) {
+ ceph_pthread_setname("mds-log-trim");
+
dout(10) << dendl;
std::unique_lock mds_lock(mds->mds_lock);
@@ -1008,7 +1016,7 @@ void MDLog::replay(MDSContext *c)
}
already_replayed = true;
- replay_thread.create("md_log_replay");
+ replay_thread.create("mds-log-replay");
}
@@ -1474,7 +1482,6 @@ void MDLog::_replay_thread()
}
le->set_start_off(pos);
- events_since_last_major_segment++;
if (auto sb = dynamic_cast<SegmentBoundary*>(le.get()); sb) {
auto seq = sb->get_seq();
if (seq > 0) {
@@ -1487,7 +1494,9 @@ void MDLog::_replay_thread()
if (sb->is_major_segment_boundary()) {
major_segments.insert(event_seq);
logger->set(l_mdl_segmjr, major_segments.size());
- events_since_last_major_segment = 0;
+ minor_segments_since_last_major_segment = 0;
+ } else {
+ ++minor_segments_since_last_major_segment;
}
} else {
event_seq++;
@@ -1618,9 +1627,6 @@ void MDLog::handle_conf_change(const std::set<std::string>& changed, const MDSMa
if (changed.count("mds_log_events_per_segment")) {
events_per_segment = g_conf().get_val<uint64_t>("mds_log_events_per_segment");
}
- if (changed.count("mds_log_major_segment_event_ratio")) {
- major_segment_event_ratio = g_conf().get_val<uint64_t>("mds_log_major_segment_event_ratio");
- }
if (changed.count("mds_log_max_events")) {
max_events = g_conf().get_val<int64_t>("mds_log_max_events");
}
@@ -1642,4 +1648,10 @@ void MDLog::handle_conf_change(const std::set<std::string>& changed, const MDSMa
if (changed.count("mds_log_trim_decay_rate")){
log_trim_counter = DecayCounter(g_conf().get_val<double>("mds_log_trim_decay_rate"));
}
+ if (changed.count("mds_log_warn_factor")) {
+ log_warn_factor = g_conf().get_val<double>("mds_log_warn_factor");
+ }
+ if (changed.count("mds_log_minor_segments_per_major_segment")) {
+ minor_segments_per_major_segment = g_conf().get_val<uint64_t>("mds_log_minor_segments_per_major_segment");
+ }
}
diff --git a/src/mds/MDLog.h b/src/mds/MDLog.h
index e2ab4e686cd..a858b40fa03 100644
--- a/src/mds/MDLog.h
+++ b/src/mds/MDLog.h
@@ -173,6 +173,9 @@ public:
// replay state
std::map<inodeno_t, std::set<inodeno_t>> pending_exports;
+ // beacon needs me too
+ bool is_trim_slow() const;
+
protected:
struct PendingEvent {
PendingEvent(LogEvent *e, Context* c, bool f=false) : le(e), fin(c), flush(f) {}
@@ -302,9 +305,9 @@ private:
bool debug_subtrees;
std::atomic_uint64_t event_large_threshold; // accessed by submit thread
uint64_t events_per_segment;
- uint64_t major_segment_event_ratio;
int64_t max_events;
uint64_t max_segments;
+ uint64_t minor_segments_per_major_segment;
bool pause;
bool skip_corrupt_events;
bool skip_unbounded_events;
@@ -312,7 +315,8 @@ private:
std::set<uint64_t> major_segments;
std::set<LogSegment*> expired_segments;
std::set<LogSegment*> expiring_segments;
- uint64_t events_since_last_major_segment = 0;
+ uint64_t minor_segments_since_last_major_segment = 0;
+ double log_warn_factor;
// log trimming decay counter
DecayCounter log_trim_counter;
diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc
index 30820493cda..52ed930d71b 100644
--- a/src/mds/MDSRank.cc
+++ b/src/mds/MDSRank.cc
@@ -496,7 +496,7 @@ MDSRank::MDSRank(
objecter->unset_honor_pool_full();
- finisher = new Finisher(cct, "MDSRank", "MR_Finisher");
+ finisher = new Finisher(cct, "MDSRank", "mds-rank-fin");
mdcache = new MDCache(this, purge_queue);
mdlog = new MDLog(this);
@@ -581,7 +581,7 @@ void MDSRankDispatcher::init()
// who is interested in it.
handle_osd_map();
- progress_thread.create("mds_rank_progr");
+ progress_thread.create("mds-rank-progr");
purge_queue.init();
diff --git a/src/mds/MetricAggregator.cc b/src/mds/MetricAggregator.cc
index 1d17bbf3e92..6cbd9a094c0 100644
--- a/src/mds/MetricAggregator.cc
+++ b/src/mds/MetricAggregator.cc
@@ -73,6 +73,7 @@ int MetricAggregator::init() {
m_cct->get_perfcounters_collection()->add(m_perf_counters);
pinger = std::thread([this]() {
+ ceph_pthread_setname("mds-ping");
std::unique_lock locker(lock);
while (!stopping) {
ping_all_active_ranks();
diff --git a/src/mds/MetricsHandler.cc b/src/mds/MetricsHandler.cc
index 9ad10b9d6e6..9fc4c6122a4 100644
--- a/src/mds/MetricsHandler.cc
+++ b/src/mds/MetricsHandler.cc
@@ -51,6 +51,7 @@ void MetricsHandler::init() {
dout(10) << dendl;
updater = std::thread([this]() {
+ ceph_pthread_setname("mds-metrics");
std::unique_lock locker(lock);
while (!stopping) {
double after = g_conf().get_val<std::chrono::seconds>("mds_metrics_update_interval").count();
diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc
index 722b6bd7422..6b12f710db4 100644
--- a/src/mds/Migrator.cc
+++ b/src/mds/Migrator.cc
@@ -1957,10 +1957,10 @@ void Migrator::handle_export_ack(const cref_t<MExportDirAck> &m)
// this keeps authority().first in sync with subtree auth state in the journal.
mdcache->adjust_subtree_auth(dir, it->second.peer, mds->get_nodeid());
+ ceph_assert(g_conf()->mds_kill_export_at != 10);
// log export completion, then finish (unfreeze, trigger finish context, etc.)
mds->mdlog->submit_entry(le, new C_MDS_ExportFinishLogged(this, dir));
mds->mdlog->flush();
- ceph_assert(g_conf()->mds_kill_export_at != 10);
}
void Migrator::export_notify_abort(CDir *dir, export_state_t& stat, set<CDir*>& bounds)
@@ -2844,7 +2844,6 @@ void Migrator::import_reverse(CDir *dir)
dout(7) << *dir << dendl;
import_state_t& stat = import_state[dir->dirfrag()];
- stat.state = IMPORT_ABORTING;
set<CDir*> bounds;
mdcache->get_subtree_bounds(dir, bounds);
@@ -2950,10 +2949,14 @@ void Migrator::import_reverse(CDir *dir)
}
in->put(CInode::PIN_IMPORTINGCAPS);
}
+ }
+
+ if (stat.state == IMPORT_LOGGINGSTART || stat.state == IMPORT_ACKING) {
for (auto& p : stat.session_map) {
Session *session = p.second.first;
session->dec_importing();
}
+ mds->server->close_forced_opened_sessions(stat.session_map);
}
// log our failure
@@ -2962,6 +2965,7 @@ void Migrator::import_reverse(CDir *dir)
mdcache->trim(num_dentries); // try trimming dentries
// notify bystanders; wait in aborting state
+ stat.state = IMPORT_ABORTING;
import_notify_abort(dir, bounds);
}
@@ -3054,10 +3058,9 @@ void Migrator::import_logged_start(dirfrag_t df, CDir *dir, mds_rank_t from,
dout(7) << *dir << dendl;
map<dirfrag_t, import_state_t>::iterator it = import_state.find(dir->dirfrag());
- if (it == import_state.end() ||
- it->second.state != IMPORT_LOGGINGSTART) {
+ ceph_assert(it != import_state.end());
+ if (it->second.state != IMPORT_LOGGINGSTART) {
dout(7) << "import " << df << " must have aborted" << dendl;
- mds->server->finish_force_open_sessions(imported_session_map);
return;
}
diff --git a/src/mds/PurgeQueue.cc b/src/mds/PurgeQueue.cc
index 925bff16542..4426d3ca6fe 100644
--- a/src/mds/PurgeQueue.cc
+++ b/src/mds/PurgeQueue.cc
@@ -122,7 +122,7 @@ PurgeQueue::PurgeQueue(
cct(cct_),
rank(rank_),
metadata_pool(metadata_pool_),
- finisher(cct, "PurgeQueue", "PQ_Finisher"),
+ finisher(cct, "PurgeQueue", "mds-pq-fin"),
timer(cct, lock),
filer(objecter_, &finisher),
objecter(objecter_),
diff --git a/src/mds/QuiesceAgent.h b/src/mds/QuiesceAgent.h
index 5c07d6d8074..85900e8e71b 100644
--- a/src/mds/QuiesceAgent.h
+++ b/src/mds/QuiesceAgent.h
@@ -30,7 +30,7 @@ class QuiesceAgent {
: quiesce_control(quiesce_control)
, stop_agent_thread(false)
, agent_thread(this) {
- agent_thread.create("quiesce.agt");
+ agent_thread.create("mds-q-agt");
};
virtual ~QuiesceAgent() {
diff --git a/src/mds/QuiesceDbEncoding.h b/src/mds/QuiesceDbEncoding.h
index c76ed2d0c52..27c7e3ca2d0 100644
--- a/src/mds/QuiesceDbEncoding.h
+++ b/src/mds/QuiesceDbEncoding.h
@@ -15,7 +15,7 @@
#include "include/encoding.h"
#include <stdint.h>
-void encode(QuiesceDbVersion const& v, bufferlist& bl, uint64_t features = 0)
+inline void encode(QuiesceDbVersion const& v, bufferlist& bl, uint64_t features = 0)
{
ENCODE_START(1, 1, bl);
encode(v.epoch, bl, features);
@@ -23,7 +23,7 @@ void encode(QuiesceDbVersion const& v, bufferlist& bl, uint64_t features = 0)
ENCODE_FINISH(bl);
}
-void decode(QuiesceDbVersion& v, bufferlist::const_iterator& p)
+inline void decode(QuiesceDbVersion& v, bufferlist::const_iterator& p)
{
DECODE_START(1, p);
decode(v.epoch, p);
@@ -31,33 +31,33 @@ void decode(QuiesceDbVersion& v, bufferlist::const_iterator& p)
DECODE_FINISH(p);
}
-void encode(QuiesceState const & state, bufferlist& bl, uint64_t features=0)
+inline void encode(QuiesceState const & state, bufferlist& bl, uint64_t features=0)
{
static_assert(QuiesceState::QS__MAX <= UINT8_MAX);
uint8_t v = (uint8_t)state;
encode(v, bl, features);
}
-void decode(QuiesceState & state, bufferlist::const_iterator& p)
+inline void decode(QuiesceState & state, bufferlist::const_iterator& p)
{
uint8_t v = 0;
decode(v, p);
state = (QuiesceState)v;
}
-void encode(QuiesceTimeInterval const & interval, bufferlist& bl, uint64_t features=0)
+inline void encode(QuiesceTimeInterval const & interval, bufferlist& bl, uint64_t features=0)
{
encode(interval.count(), bl, features);
}
-void decode(QuiesceTimeInterval & interval, bufferlist::const_iterator& p)
+inline void decode(QuiesceTimeInterval & interval, bufferlist::const_iterator& p)
{
QuiesceClock::rep count;
decode(count, p);
interval = QuiesceTimeInterval { count };
}
-void encode(RecordedQuiesceState const& rstate, bufferlist& bl, uint64_t features = 0)
+inline void encode(RecordedQuiesceState const& rstate, bufferlist& bl, uint64_t features = 0)
{
ENCODE_START(1, 1, bl);
encode(rstate.state, bl, features);
@@ -65,7 +65,7 @@ void encode(RecordedQuiesceState const& rstate, bufferlist& bl, uint64_t feature
ENCODE_FINISH(bl);
}
-void decode(RecordedQuiesceState& rstate, bufferlist::const_iterator& p)
+inline void decode(RecordedQuiesceState& rstate, bufferlist::const_iterator& p)
{
DECODE_START(1, p);
decode(rstate.state, p);
@@ -73,7 +73,7 @@ void decode(RecordedQuiesceState& rstate, bufferlist::const_iterator& p)
DECODE_FINISH(p);
}
-void encode(QuiesceSet::MemberInfo const& member, bufferlist& bl, uint64_t features = 0)
+inline void encode(QuiesceSet::MemberInfo const& member, bufferlist& bl, uint64_t features = 0)
{
ENCODE_START(1, 1, bl);
encode(member.rstate, bl, features);
@@ -81,7 +81,7 @@ void encode(QuiesceSet::MemberInfo const& member, bufferlist& bl, uint64_t featu
ENCODE_FINISH(bl);
}
-void decode(QuiesceSet::MemberInfo& member, bufferlist::const_iterator& p)
+inline void decode(QuiesceSet::MemberInfo& member, bufferlist::const_iterator& p)
{
DECODE_START(1, p);
decode(member.rstate, p);
@@ -89,7 +89,7 @@ void decode(QuiesceSet::MemberInfo& member, bufferlist::const_iterator& p)
DECODE_FINISH(p);
}
-void encode(QuiesceSet const& set, bufferlist& bl, uint64_t features = 0)
+inline void encode(QuiesceSet const& set, bufferlist& bl, uint64_t features = 0)
{
ENCODE_START(1, 1, bl);
encode(set.version, bl, features);
@@ -100,7 +100,7 @@ void encode(QuiesceSet const& set, bufferlist& bl, uint64_t features = 0)
ENCODE_FINISH(bl);
}
-void decode(QuiesceSet& set, bufferlist::const_iterator& p)
+inline void decode(QuiesceSet& set, bufferlist::const_iterator& p)
{
DECODE_START(1, p);
decode(set.version, p);
@@ -111,7 +111,7 @@ void decode(QuiesceSet& set, bufferlist::const_iterator& p)
DECODE_FINISH(p);
}
-void encode(QuiesceDbRequest const& req, bufferlist& bl, uint64_t features = 0)
+inline void encode(QuiesceDbRequest const& req, bufferlist& bl, uint64_t features = 0)
{
ENCODE_START(1, 1, bl);
encode(req.control.raw, bl, features);
@@ -124,7 +124,7 @@ void encode(QuiesceDbRequest const& req, bufferlist& bl, uint64_t features = 0)
ENCODE_FINISH(bl);
}
-void decode(QuiesceDbRequest& req, bufferlist::const_iterator& p)
+inline void decode(QuiesceDbRequest& req, bufferlist::const_iterator& p)
{
DECODE_START(1, p);
decode(req.control.raw, p);
@@ -137,7 +137,7 @@ void decode(QuiesceDbRequest& req, bufferlist::const_iterator& p)
DECODE_FINISH(p);
}
-void encode(QuiesceDbListing const& listing, bufferlist& bl, uint64_t features = 0)
+inline void encode(QuiesceDbListing const& listing, bufferlist& bl, uint64_t features = 0)
{
ENCODE_START(1, 1, bl);
encode(listing.db_version, bl, features);
@@ -146,7 +146,7 @@ void encode(QuiesceDbListing const& listing, bufferlist& bl, uint64_t features =
ENCODE_FINISH(bl);
}
-void decode(QuiesceDbListing& listing, bufferlist::const_iterator& p)
+inline void decode(QuiesceDbListing& listing, bufferlist::const_iterator& p)
{
DECODE_START(1, p);
decode(listing.db_version, p);
@@ -155,7 +155,7 @@ void decode(QuiesceDbListing& listing, bufferlist::const_iterator& p)
DECODE_FINISH(p);
}
-void encode(QuiesceDbPeerListing const& listing, bufferlist& bl, uint64_t features = 0)
+inline void encode(QuiesceDbPeerListing const& listing, bufferlist& bl, uint64_t features = 0)
{
ENCODE_START(1, 1, bl);
encode(listing.origin, bl, features);
@@ -163,7 +163,7 @@ void encode(QuiesceDbPeerListing const& listing, bufferlist& bl, uint64_t featur
ENCODE_FINISH(bl);
}
-void decode(QuiesceDbPeerListing& listing, bufferlist::const_iterator& p)
+inline void decode(QuiesceDbPeerListing& listing, bufferlist::const_iterator& p)
{
DECODE_START(1, p);
decode(listing.origin, p);
@@ -171,7 +171,7 @@ void decode(QuiesceDbPeerListing& listing, bufferlist::const_iterator& p)
DECODE_FINISH(p);
}
-void encode(QuiesceMap::RootInfo const& root, bufferlist& bl, uint64_t features = 0)
+inline void encode(QuiesceMap::RootInfo const& root, bufferlist& bl, uint64_t features = 0)
{
ENCODE_START(1, 1, bl);
encode(root.state, bl, features);
@@ -179,7 +179,7 @@ void encode(QuiesceMap::RootInfo const& root, bufferlist& bl, uint64_t features
ENCODE_FINISH(bl);
}
-void decode(QuiesceMap::RootInfo& root, bufferlist::const_iterator& p)
+inline void decode(QuiesceMap::RootInfo& root, bufferlist::const_iterator& p)
{
DECODE_START(1, p);
decode(root.state, p);
@@ -187,7 +187,7 @@ void decode(QuiesceMap::RootInfo& root, bufferlist::const_iterator& p)
DECODE_FINISH(p);
}
-void encode(QuiesceMap const& map, bufferlist& bl, uint64_t features = 0)
+inline void encode(QuiesceMap const& map, bufferlist& bl, uint64_t features = 0)
{
ENCODE_START(1, 1, bl);
encode(map.db_version, bl, features);
@@ -195,7 +195,7 @@ void encode(QuiesceMap const& map, bufferlist& bl, uint64_t features = 0)
ENCODE_FINISH(bl);
}
-void decode(QuiesceMap& map, bufferlist::const_iterator& p)
+inline void decode(QuiesceMap& map, bufferlist::const_iterator& p)
{
DECODE_START(1, p);
decode(map.db_version, p);
@@ -203,7 +203,7 @@ void decode(QuiesceMap& map, bufferlist::const_iterator& p)
DECODE_FINISH(p);
}
-void encode(QuiesceDbPeerAck const& ack, bufferlist& bl, uint64_t features = 0)
+inline void encode(QuiesceDbPeerAck const& ack, bufferlist& bl, uint64_t features = 0)
{
ENCODE_START(1, 1, bl);
encode(ack.origin, bl, features);
@@ -211,7 +211,7 @@ void encode(QuiesceDbPeerAck const& ack, bufferlist& bl, uint64_t features = 0)
ENCODE_FINISH(bl);
}
-void decode(QuiesceDbPeerAck& ack, bufferlist::const_iterator& p)
+inline void decode(QuiesceDbPeerAck& ack, bufferlist::const_iterator& p)
{
DECODE_START(1, p);
decode(ack.origin, p);
diff --git a/src/mds/QuiesceDbManager.cc b/src/mds/QuiesceDbManager.cc
index 12c83634e54..bb3ae93e378 100644
--- a/src/mds/QuiesceDbManager.cc
+++ b/src/mds/QuiesceDbManager.cc
@@ -200,7 +200,7 @@ void QuiesceDbManager::update_membership(const QuiesceClusterMembership& new_mem
// start the thread
dout(5) << "starting the db mgr thread at epoch: " << new_membership.epoch << dendl;
db_thread_should_exit = false;
- quiesce_db_thread.create("quiesce_db_mgr");
+ quiesce_db_thread.create("mds-q-db");
} else if (quiesce_db_thread.is_started()) {
submit_condition.notify_all();
}
diff --git a/src/mds/Server.cc b/src/mds/Server.cc
index cf286b46d46..5874a3dce56 100644
--- a/src/mds/Server.cc
+++ b/src/mds/Server.cc
@@ -615,6 +615,9 @@ void Server::handle_client_session(const cref_t<MClientSession> &m)
mds->send_message(reply, m->get_connection());
return;
}
+ if (!session->client_opened) {
+ session->client_opened = true;
+ }
if (session->is_opening() ||
session->is_open() ||
session->is_stale() ||
@@ -1054,7 +1057,7 @@ version_t Server::prepare_force_open_sessions(map<client_t,entity_inst_t>& cm,
return pv;
}
-void Server::finish_force_open_sessions(const map<client_t,pair<Session*,uint64_t> >& smap,
+void Server::finish_force_open_sessions(map<client_t,pair<Session*,uint64_t> >& smap,
bool dec_import)
{
/*
@@ -1073,7 +1076,7 @@ void Server::finish_force_open_sessions(const map<client_t,pair<Session*,uint64_
dout(10) << "force_open_sessions skipping changed " << session->info.inst << dendl;
} else {
dout(10) << "force_open_sessions opened " << session->info.inst << dendl;
- mds->sessionmap.set_state(session, Session::STATE_OPEN);
+ it.second.second = mds->sessionmap.set_state(session, Session::STATE_OPEN);
mds->sessionmap.touch_session(session);
metrics_handler->add_session(session);
@@ -1103,6 +1106,29 @@ void Server::finish_force_open_sessions(const map<client_t,pair<Session*,uint64_
dout(10) << __func__ << ": final v " << mds->sessionmap.get_version() << dendl;
}
+void Server::close_forced_opened_sessions(const map<client_t,pair<Session*,uint64_t> >& smap)
+{
+ dout(10) << __func__ << " on " << smap.size() << " clients" << dendl;
+
+ for (auto &it : smap) {
+ Session *session = it.second.first;
+ uint64_t sseq = it.second.second;
+ if (sseq == 0)
+ continue;
+ if (session->get_state_seq() != sseq) {
+ dout(10) << "skipping changed session (" << session->get_state_name() << ") "
+ << session->info.inst << dendl;
+ continue;
+ }
+ if (session->client_opened)
+ continue;
+ dout(10) << "closing forced opened session (" << session->get_state_name() << ") "
+ << session->info.inst << dendl;
+ ceph_assert(!session->is_importing());
+ journal_close_session(session, Session::STATE_CLOSING, NULL);
+ }
+}
+
class C_MDS_TerminatedSessions : public ServerContext {
void finish(int r) override {
server->terminating_sessions = false;
diff --git a/src/mds/Server.h b/src/mds/Server.h
index 68842ea01cb..5f9a763e550 100644
--- a/src/mds/Server.h
+++ b/src/mds/Server.h
@@ -129,8 +129,9 @@ public:
version_t prepare_force_open_sessions(std::map<client_t,entity_inst_t> &cm,
std::map<client_t,client_metadata_t>& cmm,
std::map<client_t,std::pair<Session*,uint64_t> >& smap);
- void finish_force_open_sessions(const std::map<client_t,std::pair<Session*,uint64_t> >& smap,
+ void finish_force_open_sessions(std::map<client_t,std::pair<Session*,uint64_t> >& smap,
bool dec_import=true);
+ void close_forced_opened_sessions(const std::map<client_t,std::pair<Session*,uint64_t> >& smap);
void flush_client_sessions(std::set<client_t>& client_set, MDSGatherBuilder& gather);
void finish_flush_session(Session *session, version_t seq);
void terminate_sessions();
diff --git a/src/mds/SessionMap.cc b/src/mds/SessionMap.cc
index ba0b0817738..0f6038eb82b 100644
--- a/src/mds/SessionMap.cc
+++ b/src/mds/SessionMap.cc
@@ -615,6 +615,7 @@ void Session::dump(Formatter *f, bool cap_dump) const
f->dump_unsigned("num_completed_requests", get_num_completed_requests());
f->dump_unsigned("num_completed_flushes", get_num_completed_flushes());
f->dump_bool("reconnecting", reconnecting);
+ f->dump_int("importing_count", importing_count);
f->dump_object("recall_caps", recall_caps);
f->dump_object("release_caps", release_caps);
f->dump_object("recall_caps_throttle", recall_caps_throttle);
diff --git a/src/mds/SessionMap.h b/src/mds/SessionMap.h
index 9e82f00a9bf..bfe7dcd4895 100644
--- a/src/mds/SessionMap.h
+++ b/src/mds/SessionMap.h
@@ -417,6 +417,10 @@ public:
session_info_t info; ///< durable bits
MDSAuthCaps auth_caps;
+ // True if the session is opened by the client.
+ // False if the session is forced to open, until it is opened again by the client.
+ bool client_opened = false;
+
xlist<Session*>::item item_session_list;
std::list<ceph::ref_t<Message>> preopen_out_queue; ///< messages for client, queued before they connect
diff --git a/src/messages/MClientCaps.h b/src/messages/MClientCaps.h
index 96b2cb7d8b8..b001032225e 100644
--- a/src/messages/MClientCaps.h
+++ b/src/messages/MClientCaps.h
@@ -117,9 +117,9 @@ private:
void set_ctime(const utime_t &t) { ctime = t; }
void set_atime(const utime_t &t) { atime = t; }
- void set_cap_peer(uint64_t id, ceph_seq_t seq, ceph_seq_t mseq, int mds, int flags) {
+ void set_cap_peer(uint64_t id, ceph_seq_t issue_seq, ceph_seq_t mseq, int mds, int flags) {
peer.cap_id = id;
- peer.seq = seq;
+ peer.issue_seq = issue_seq;
peer.mseq = mseq;
peer.mds = mds;
peer.flags = flags;
@@ -137,11 +137,12 @@ protected:
inodeno_t ino,
inodeno_t realm,
uint64_t id,
- long seq,
+ ceph_seq_t seq,
int caps,
int wanted,
int dirty,
- int mseq,
+ ceph_seq_t mseq,
+ ceph_seq_t issue_seq,
epoch_t oeb)
: SafeMessage{CEPH_MSG_CLIENT_CAPS, HEAD_VERSION, COMPAT_VERSION},
osd_epoch_barrier(oeb) {
@@ -155,11 +156,12 @@ protected:
head.wanted = wanted;
head.dirty = dirty;
head.migrate_seq = mseq;
+ head.issue_seq = issue_seq;
memset(&peer, 0, sizeof(peer));
}
MClientCaps(int op,
inodeno_t ino, inodeno_t realm,
- uint64_t id, int mseq, epoch_t oeb)
+ uint64_t id, ceph_seq_t mseq, epoch_t oeb)
: SafeMessage{CEPH_MSG_CLIENT_CAPS, HEAD_VERSION, COMPAT_VERSION},
osd_epoch_barrier(oeb) {
memset(&head, 0, sizeof(head));
@@ -181,7 +183,8 @@ public:
out << "client_caps(" << ceph_cap_op_name(head.op)
<< " ino " << inodeno_t(head.ino)
<< " " << head.cap_id
- << " seq " << head.seq;
+ << " seq " << head.seq
+ << " issue_seq " << head.issue_seq;
if (get_tid())
out << " tid " << get_tid();
out << " caps=" << ccap_string(head.caps)
diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc
index d8cca4ceb61..f742303c6e9 100644
--- a/src/mon/MDSMonitor.cc
+++ b/src/mon/MDSMonitor.cc
@@ -758,6 +758,14 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op)
if (state == MDSMap::STATE_DNE) {
dout(1) << __func__ << ": DNE from " << info << dendl;
+
+ /* send a beacon reply so MDSDaemon::suicide() finishes the
+ Beacon::send_and_wait() call */
+ auto beacon = make_message<MMDSBeacon>(mon.monmap->fsid,
+ m->get_global_id(), m->get_name(), get_fsmap().get_epoch(),
+ m->get_state(), m->get_seq(), CEPH_FEATURES_SUPPORTED_DEFAULT);
+ mon.send_reply(op, beacon.detach());
+
goto evict;
}
diff --git a/src/mon/MonMap.cc b/src/mon/MonMap.cc
index 5598bbe8709..8d0540d71f2 100644
--- a/src/mon/MonMap.cc
+++ b/src/mon/MonMap.cc
@@ -196,7 +196,12 @@ void MonMap::encode(ceph::buffer::list& blist, uint64_t con_features) const
if (!HAVE_FEATURE(con_features, MONENC) ||
!HAVE_FEATURE(con_features, SERVER_NAUTILUS)) {
for (auto& [name, info] : mon_info) {
- legacy_mon_addr[name] = info.public_addrs.legacy_addr();
+ // see note in mon_info_t::encode()
+ auto addr = info.public_addrs.legacy_addr();
+ if (addr == entity_addr_t()) {
+ addr = info.public_addrs.as_legacy_addr();
+ }
+ legacy_mon_addr[name] = addr;
}
}
diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc
index 9fda03b4905..719403925ad 100755
--- a/src/mon/NVMeofGwMap.cc
+++ b/src/mon/NVMeofGwMap.cc
@@ -16,7 +16,9 @@
#include "NVMeofGwMon.h"
#include "NVMeofGwMap.h"
#include "OSDMonitor.h"
+#include "mon/health_check.h"
+using std::list;
using std::map;
using std::make_pair;
using std::ostream;
@@ -893,6 +895,47 @@ struct CMonRequestProposal : public Context {
}
};
+void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const
+{
+ list<string> singleGatewayDetail;
+ list<string> gatewayDownDetail;
+ for (const auto& created_map_pair: created_gws) {
+ const auto& group_key = created_map_pair.first;
+ auto& group = group_key.second;
+ const NvmeGwMonStates& gw_created_map = created_map_pair.second;
+ if ( gw_created_map.size() == 1) {
+ ostringstream ss;
+ ss << "NVMeoF Gateway Group '" << group << "' has 1 gateway." ;
+ singleGatewayDetail.push_back(ss.str());
+ }
+ for (const auto& gw_created_pair: gw_created_map) {
+ const auto& gw_id = gw_created_pair.first;
+ const auto& gw_created = gw_created_pair.second;
+ if (gw_created.availability == gw_availability_t::GW_UNAVAILABLE) {
+ ostringstream ss;
+ ss << "NVMeoF Gateway '" << gw_id << "' is unavailable." ;
+ gatewayDownDetail.push_back(ss.str());
+ }
+ }
+ }
+ if (!singleGatewayDetail.empty()) {
+ ostringstream ss;
+ ss << singleGatewayDetail.size() << " group(s) have only 1 nvmeof gateway"
+ << "; HA is not possible with single gateway.";
+ auto& d = checks->add("NVMEOF_SINGLE_GATEWAY", HEALTH_WARN,
+ ss.str(), singleGatewayDetail.size());
+ d.detail.swap(singleGatewayDetail);
+ }
+ if (!gatewayDownDetail.empty()) {
+ ostringstream ss;
+ ss << gatewayDownDetail.size() << " gateway(s) are in unavailable state"
+ << "; gateway might be down, try to redeploy.";
+ auto& d = checks->add("NVMEOF_GATEWAY_DOWN", HEALTH_WARN,
+ ss.str(), gatewayDownDetail.size());
+ d.detail.swap(gatewayDownDetail);
+ }
+}
+
int NVMeofGwMap::blocklist_gw(
const NvmeGwId &gw_id, const NvmeGroupKey& group_key,
NvmeAnaGrpId grpid, epoch_t &epoch, bool failover)
diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h
index 267d85b10f9..5f657733012 100755
--- a/src/mon/NVMeofGwMap.h
+++ b/src/mon/NVMeofGwMap.h
@@ -27,6 +27,9 @@
#include "NVMeofGwTypes.h"
using ceph::coarse_mono_clock;
+
+class health_check_map_t;
+
class Monitor;
/*-------------------*/
class NVMeofGwMap
@@ -140,6 +143,8 @@ public:
decode(fsm_timers, bl);
DECODE_FINISH(bl);
}
+
+ void get_health_checks(health_check_map_t *checks) const;
};
#include "NVMeofGwSerialize.h"
diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc
index 9d80c23a4c1..0fe5c3e655f 100644
--- a/src/mon/NVMeofGwMon.cc
+++ b/src/mon/NVMeofGwMon.cc
@@ -181,6 +181,11 @@ void NVMeofGwMon::encode_pending(MonitorDBStore::TransactionRef t)
<< HAVE_FEATURE(mon.get_quorum_con_features(), NVMEOFHA) << dendl;
put_version(t, pending_map.epoch, bl);
put_last_committed(t, pending_map.epoch);
+
+ //health
+ health_check_map_t checks;
+ pending_map.get_health_checks(&checks);
+ encode_health(checks, t);
}
void NVMeofGwMon::update_from_paxos(bool *need_bootstrap)
@@ -193,6 +198,7 @@ void NVMeofGwMon::update_from_paxos(bool *need_bootstrap)
bufferlist bl;
int err = get_version(version, bl);
ceph_assert(err == 0);
+ load_health();
auto p = bl.cbegin();
map.decode(p);
diff --git a/src/msg/async/Stack.h b/src/msg/async/Stack.h
index 6739968f4e2..5f8bbc172df 100644
--- a/src/msg/async/Stack.h
+++ b/src/msg/async/Stack.h
@@ -352,7 +352,7 @@ class NetworkStack {
static constexpr int TASK_COMM_LEN = 16;
char tp_name[TASK_COMM_LEN];
sprintf(tp_name, "msgr-worker-%u", id);
- ceph_pthread_setname(pthread_self(), tp_name);
+ ceph_pthread_setname(tp_name);
}
protected:
diff --git a/src/msg/async/rdma/RDMAStack.cc b/src/msg/async/rdma/RDMAStack.cc
index 12db599d684..789a624cf90 100644
--- a/src/msg/async/rdma/RDMAStack.cc
+++ b/src/msg/async/rdma/RDMAStack.cc
@@ -92,7 +92,6 @@ void RDMADispatcher::polling_start()
ceph_assert(rx_cq);
t = std::thread(&RDMADispatcher::polling, this);
- ceph_pthread_setname(t.native_handle(), "rdma-polling");
}
void RDMADispatcher::polling_stop()
@@ -263,6 +262,7 @@ int RDMADispatcher::post_chunks_to_rq(int num, QueuePair *qp)
void RDMADispatcher::polling()
{
+ ceph_pthread_setname("rdma-polling");
static int MAX_COMPLETIONS = 32;
ibv_wc wc[MAX_COMPLETIONS];
diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc
index 14d2f85f40f..44f8e85b5ef 100644
--- a/src/osd/PrimaryLogPG.cc
+++ b/src/osd/PrimaryLogPG.cc
@@ -6006,7 +6006,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
object_info_t& oi = obs.oi;
const hobject_t& soid = oi.soid;
const bool skip_data_digest = osd->store->has_builtin_csum() &&
- osd->osd_skip_data_digest;
+ *osd->osd_skip_data_digest;
PGTransaction* t = ctx->op_t.get();
@@ -6069,9 +6069,9 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
// munge ZERO -> TRUNCATE? (don't munge to DELETE or we risk hosing attributes)
if (op.op == CEPH_OSD_OP_ZERO &&
obs.exists &&
- op.extent.offset < static_cast<Option::size_t>(osd->osd_max_object_size) &&
+ op.extent.offset < *osd->osd_max_object_size &&
op.extent.length >= 1 &&
- op.extent.length <= static_cast<Option::size_t>(osd->osd_max_object_size) &&
+ op.extent.length <= *osd->osd_max_object_size &&
op.extent.offset + op.extent.length >= oi.size) {
if (op.extent.offset >= oi.size) {
// no-op
@@ -6781,7 +6781,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
result = check_offset_and_length(
op.extent.offset, op.extent.length,
- static_cast<Option::size_t>(osd->osd_max_object_size), get_dpp());
+ *osd->osd_max_object_size, get_dpp());
if (result < 0)
break;
@@ -6838,7 +6838,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
result = check_offset_and_length(
0, op.extent.length,
- static_cast<Option::size_t>(osd->osd_max_object_size), get_dpp());
+ *osd->osd_max_object_size, get_dpp());
if (result < 0)
break;
@@ -6888,7 +6888,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
{ // zero
result = check_offset_and_length(
op.extent.offset, op.extent.length,
- static_cast<Option::size_t>(osd->osd_max_object_size), get_dpp());
+ *osd->osd_max_object_size, get_dpp());
if (result < 0)
break;
@@ -6953,7 +6953,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
result = check_offset_and_length(
op.extent.offset, op.extent.length,
- static_cast<Option::size_t>(osd->osd_max_object_size), get_dpp());
+ *osd->osd_max_object_size, get_dpp());
if (result < 0)
break;
diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc
index c8cf27d2116..110c2c7d266 100644
--- a/src/osd/scrubber/osd_scrub.cc
+++ b/src/osd/scrubber/osd_scrub.cc
@@ -65,7 +65,7 @@ void OsdScrub::dump_scrubs(ceph::Formatter* f) const
void OsdScrub::dump_scrub_reservations(ceph::Formatter* f) const
{
m_resource_bookkeeper.dump_scrub_reservations(f);
- f->open_array_section("remote_scrub_reservations");
+ f->open_object_section("remote_scrub_reservations");
m_osd_svc.get_scrub_reserver().dump(f);
f->close_section();
}
diff --git a/src/osd/scrubber/osd_scrub_sched.cc b/src/osd/scrubber/osd_scrub_sched.cc
index cd80625aaec..c116bcbb4c2 100644
--- a/src/osd/scrubber/osd_scrub_sched.cc
+++ b/src/osd/scrubber/osd_scrub_sched.cc
@@ -140,9 +140,10 @@ bool ScrubQueue::remove_entry_unlocked(spg_t pgid, scrub_level_t s_or_d)
void ScrubQueue::dump_scrubs(ceph::Formatter* f) const
{
ceph_assert(f != nullptr);
+ const auto query_time = ceph_clock_now();
f->open_array_section("scrubs");
for_each_job(
- [&f](const Scrub::SchedEntry& e) {
+ [&f, query_time](const Scrub::SchedEntry& e) {
f->open_object_section("scrub");
f->dump_stream("pgid") << e.pgid;
f->dump_stream("sched_time") << e.schedule.not_before;
@@ -151,6 +152,15 @@ void ScrubQueue::dump_scrubs(ceph::Formatter* f) const
f->dump_bool(
"forced",
e.schedule.scheduled_at == PgScrubber::scrub_must_stamp());
+
+ f->dump_stream("level") << (e.level == scrub_level_t::shallow
+ ? "shallow"
+ : "deep");
+ f->dump_stream("urgency") << fmt::format("{}", e.urgency);
+ f->dump_bool("eligible", e.schedule.not_before <= query_time);
+ f->dump_bool("overdue", e.schedule.deadline < query_time);
+ f->dump_stream("last_issue") << fmt::format("{}", e.last_issue);
+
f->close_section();
},
std::numeric_limits<int>::max());
diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc
index c37f31d28dc..aa53df5ae8a 100644
--- a/src/osd/scrubber/pg_scrubber.cc
+++ b/src/osd/scrubber/pg_scrubber.cc
@@ -3,13 +3,13 @@
#include "./pg_scrubber.h" // '.' notation used to affect clang-format order
+#include <fmt/ranges.h>
+
#include <cmath>
#include <iostream>
#include <span>
#include <vector>
-#include <fmt/ranges.h>
-
#include "debug.h"
#include "common/ceph_time.h"
@@ -766,8 +766,13 @@ void PgScrubber::on_operator_periodic_cmd(
asok_response_section(f, true, scrub_level, stamp);
if (scrub_level == scrub_level_t::deep) {
+ const auto saved_shallow_stamp = m_pg->info.history.last_scrub_stamp;
// this call sets both stamps
m_pg->set_last_deep_scrub_stamp(stamp);
+ // restore the shallow stamp, as otherwise it will be scheduled before
+ // the deep, failing whatever test code called us (this is a test-only
+ // interface).
+ m_pg->set_last_scrub_stamp(saved_shallow_stamp);
} else {
m_pg->set_last_scrub_stamp(stamp);
}
@@ -819,21 +824,21 @@ namespace {
* an aux function to be used in select_range() below, to
* select the correct chunk size based on the type of scrub
*/
-int size_from_conf(
+int64_t size_from_conf(
bool is_deep,
const ceph::common::ConfigProxy& conf,
- std::string_view deep_opt,
- std::string_view shallow_opt)
+ const md_config_cacher_t<int64_t>& deep_opt,
+ const md_config_cacher_t<int64_t>& shallow_opt)
{
if (!is_deep) {
- auto sz = conf.get_val<int64_t>(shallow_opt);
+ auto sz = *shallow_opt;
if (sz != 0) {
// assuming '0' means that no distinction was yet configured between
// deep and shallow scrubbing
- return static_cast<int>(sz);
+ return sz;
}
}
- return static_cast<int>(conf.get_val<int64_t>(deep_opt));
+ return *deep_opt;
}
} // anonymous namespace
@@ -912,16 +917,16 @@ std::optional<uint64_t> PgScrubber::select_range()
dout(20) << fmt::format(
"{} {} mins: {}d {}s, max: {}d {}s", __func__,
(m_is_deep ? "D" : "S"),
- conf.get_val<int64_t>("osd_scrub_chunk_min"),
- conf.get_val<int64_t>("osd_shallow_scrub_chunk_min"),
- conf.get_val<int64_t>("osd_scrub_chunk_max"),
- conf.get_val<int64_t>("osd_shallow_scrub_chunk_max"))
+ *osd_scrub_chunk_min,
+ *osd_shallow_scrub_chunk_min,
+ *osd_scrub_chunk_max,
+ *osd_shallow_scrub_chunk_max)
<< dendl;
- const int min_from_conf = size_from_conf(
- m_is_deep, conf, "osd_scrub_chunk_min", "osd_shallow_scrub_chunk_min");
- const int max_from_conf = size_from_conf(
- m_is_deep, conf, "osd_scrub_chunk_max", "osd_shallow_scrub_chunk_max");
+ const int min_from_conf = static_cast<int>(size_from_conf(
+ m_is_deep, conf, osd_scrub_chunk_min, osd_shallow_scrub_chunk_min));
+ const int max_from_conf = static_cast<int>(size_from_conf(
+ m_is_deep, conf, osd_scrub_chunk_max, osd_shallow_scrub_chunk_max));
const int divisor = static_cast<int>(preemption_data.chunk_divisor());
const int min_chunk_sz = std::max(3, min_from_conf / divisor);
@@ -1635,7 +1640,7 @@ void PgScrubber::replica_scrub_op(OpRequestRef op)
advance_token();
const auto& conf = m_pg->get_cct()->_conf;
const int max_from_conf = size_from_conf(
- m_is_deep, conf, "osd_scrub_chunk_max", "osd_shallow_scrub_chunk_max");
+ m_is_deep, conf, osd_scrub_chunk_max, osd_shallow_scrub_chunk_max);
auto cost = get_scrub_cost(max_from_conf);
m_osds->queue_for_rep_scrub(m_pg,
m_replica_request_priority,
@@ -2541,6 +2546,16 @@ PgScrubber::PgScrubber(PG* pg)
, m_pg_id{pg->pg_id}
, m_osds{m_pg->osd}
, m_pg_whoami{pg->pg_whoami}
+ , osd_scrub_chunk_max{m_osds->cct->_conf, "osd_scrub_chunk_max"}
+ , osd_shallow_scrub_chunk_max{m_osds->cct->_conf,
+ "osd_shallow_scrub_chunk_max"}
+ , osd_scrub_chunk_min{m_osds->cct->_conf, "osd_scrub_chunk_min"}
+ , osd_shallow_scrub_chunk_min{m_osds->cct->_conf,
+ "osd_shallow_scrub_chunk_min"}
+ , osd_stats_update_period_scrubbing{
+ m_osds->cct->_conf, "osd_stats_update_period_scrubbing"}
+ , osd_stats_update_period_not_scrubbing{
+ m_osds->cct->_conf, "osd_stats_update_period_not_scrubbing"}
, preemption_data{pg}
{
m_fsm = std::make_unique<ScrubMachine>(m_pg, this);
@@ -2669,7 +2684,8 @@ const OSDMapRef& PgScrubber::get_osdmap() const
LoggerSinkSet& PgScrubber::get_logger() const { return *m_osds->clog.get(); }
-ostream &operator<<(ostream &out, const PgScrubber &scrubber) {
+ostream& operator<<(ostream& out, const PgScrubber& scrubber)
+{
return out << scrubber.m_flags;
}
@@ -2783,16 +2799,14 @@ void PgScrubber::update_scrub_stats(ceph::coarse_real_clock::time_point now_is)
using clock = ceph::coarse_real_clock;
using namespace std::chrono;
- const seconds period_active = seconds(m_pg->get_cct()->_conf.get_val<int64_t>(
- "osd_stats_update_period_scrubbing"));
+ const seconds period_active = seconds(*osd_stats_update_period_scrubbing);
if (!period_active.count()) {
// a way for the operator to disable these stats updates
return;
}
- const seconds period_inactive =
- seconds(m_pg->get_cct()->_conf.get_val<int64_t>(
- "osd_stats_update_period_not_scrubbing") +
- m_pg_id.pgid.m_seed % 30);
+ const seconds period_inactive = seconds(
+ *osd_stats_update_period_not_scrubbing +
+ m_pg_id.pgid.m_seed % 30);
// determine the required update period, based on our current state
auto period{period_inactive};
@@ -2826,10 +2840,10 @@ void PgScrubber::update_scrub_stats(ceph::coarse_real_clock::time_point now_is)
// ///////////////////// preemption_data_t //////////////////////////////////
-PgScrubber::preemption_data_t::preemption_data_t(PG* pg) : m_pg{pg}
+PgScrubber::preemption_data_t::preemption_data_t(PG* pg) : m_pg{pg},
+ osd_scrub_max_preemptions{pg->cct->_conf, "osd_scrub_max_preemptions"}
{
- m_left = static_cast<int>(
- m_pg->get_cct()->_conf.get_val<uint64_t>("osd_scrub_max_preemptions"));
+ m_left = *osd_scrub_max_preemptions;
}
void PgScrubber::preemption_data_t::reset()
@@ -2838,8 +2852,7 @@ void PgScrubber::preemption_data_t::reset()
m_preemptable = false;
m_preempted = false;
- m_left = static_cast<int>(
- m_pg->cct->_conf.get_val<uint64_t>("osd_scrub_max_preemptions"));
+ m_left = *osd_scrub_max_preemptions;
m_size_divisor = 1;
}
diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h
index 3d7e16cd359..0d9e8c1e9f6 100644
--- a/src/osd/scrubber/pg_scrubber.h
+++ b/src/osd/scrubber/pg_scrubber.h
@@ -75,6 +75,8 @@ Main Scrubber interfaces:
#include <string_view>
#include <vector>
+#include "common/config_proxy.h"
+#include "common/config_cacher.h"
#include "osd/PG.h"
#include "osd/scrubber_common.h"
@@ -895,6 +897,24 @@ class PgScrubber : public ScrubPgIF,
// scrub state.
ceph::coarse_real_clock::time_point m_last_stat_upd{};
+ // ------------------ cached (frequently used) configuration values
+
+ /// initial (& max) number of objects to scrub in one pass - deep scrub
+ md_config_cacher_t<int64_t> osd_scrub_chunk_max;
+ /// initial (& max) number of objects to scrub in one pass - shallow
+ md_config_cacher_t<int64_t> osd_shallow_scrub_chunk_max;
+
+ /// chunk size won't be reduced (when preempted) below this
+ /// value (deep scrub)
+ md_config_cacher_t<int64_t> osd_scrub_chunk_min;
+ /// chunk size won't be reduced below this value (shallow scrub)
+ md_config_cacher_t<int64_t> osd_shallow_scrub_chunk_min;
+
+ /// stats update (publish_stats_to_osd()) interval while scrubbing
+ md_config_cacher_t<int64_t> osd_stats_update_period_scrubbing;
+ /// stats update interval while not scrubbing
+ md_config_cacher_t<int64_t> osd_stats_update_period_not_scrubbing;
+
// ------------ members used if we are a replica
epoch_t m_replica_min_epoch; ///< the min epoch needed to handle this message
@@ -991,6 +1011,9 @@ class PgScrubber : public ScrubPgIF,
mutable ceph::mutex m_preemption_lock = ceph::make_mutex("preemption_lock");
bool m_preemptable{false};
bool m_preempted{false};
+
+ /// the number of preemptions allowed before we start blocking
+ md_config_cacher_t<uint64_t> osd_scrub_max_preemptions;
int m_left;
size_t m_size_divisor{1};
bool are_preemptions_left() const { return m_left > 0; }
diff --git a/src/pybind/mgr/balancer/module.py b/src/pybind/mgr/balancer/module.py
index c98ce9aec41..476304275c1 100644
--- a/src/pybind/mgr/balancer/module.py
+++ b/src/pybind/mgr/balancer/module.py
@@ -325,6 +325,11 @@ class Module(MgrModule):
type='str',
default='',
desc='pools which the automatic balancing will be limited to',
+ runtime=True),
+ Option(name='update_pg_upmap_activity',
+ type='bool',
+ default=False,
+ desc='Updates pg_upmap activity stats to be used in `balancer status detail`',
runtime=True)
]
@@ -339,12 +344,10 @@ class Module(MgrModule):
no_optimization_needed = False
success_string = 'Optimization plan created successfully'
in_progress_string = 'in progress'
- last_pg_upmap: List[Dict[str, Any]] = []
pg_upmap_items_added: List[Dict[str, Any]] = []
pg_upmap_items_removed: List[Dict[str, Any]] = []
- last_pg_upmap_primaries: List[Dict[str, Any]] = []
pg_upmap_primaries_added: List[Dict[str, Any]] = []
- pg_upmap_activity_initalized = False
+ pg_upmap_primaries_removed: List[Dict[str, Any]] = []
def __init__(self, *args: Any, **kwargs: Any) -> None:
super(Module, self).__init__(*args, **kwargs)
@@ -371,6 +374,11 @@ class Module(MgrModule):
"""
Show balancer status (detailed)
"""
+ pg_upmap_activity = cast(bool, self.get_module_option('update_pg_upmap_activity'))
+ if not pg_upmap_activity:
+ msg = 'This command is disabled.\n' \
+ 'To enable, run `ceph config set mgr mgr/balancer/update_pg_upmap_activity True`.\n'
+ return 0, msg, ''
s = {
'plans': list(self.plans.keys()),
'active': self.active,
@@ -665,7 +673,9 @@ class Module(MgrModule):
if not plan_:
return (-errno.ENOENT, '', f'plan {plan} not found')
r, detail = self.execute(plan_)
- self.update_pg_upmap_activity() # update pg activity in `balancer status detail`
+ pg_upmap_activity = cast(bool, self.get_module_option('update_pg_upmap_activity'))
+ if pg_upmap_activity:
+ self.update_pg_upmap_activity(plan_) # update pg activity in `balancer status detail`
self.plan_rm(plan)
return (r, '', detail)
@@ -757,7 +767,9 @@ class Module(MgrModule):
self.execute(plan)
else:
self.optimize_result = detail
- self.update_pg_upmap_activity() # update pg activity in `balancer status detail`
+ pg_upmap_activity = cast(bool, self.get_module_option('update_pg_upmap_activity'))
+ if pg_upmap_activity:
+ self.update_pg_upmap_activity(plan) # update pg activity in `balancer status detail`
self.optimizing = False
self.log.debug('Sleeping for %d', sleep_interval)
self.event.wait(sleep_interval)
@@ -1582,22 +1594,16 @@ class Module(MgrModule):
'mode': self.mode,
}
- def update_pg_upmap_activity(self) -> None:
- osdmap = self.get_osdmap()
- if not self.pg_upmap_activity_initalized:
- self.last_pg_upmap = osdmap.dump().get('pg_upmap_items', '')
- self.last_pg_upmap_primaries = osdmap.dump().get('pg_upmap_primaries', '')
- self.pg_upmap_activity_initalized = True
+ def update_pg_upmap_activity(self, plan: Plan) -> None:
+ incdump = plan.inc.dump()
# update pg_upmap_items
- self.pg_upmap_items_added = [pg for pg in osdmap.dump().get('pg_upmap_items', '') if pg not in self.last_pg_upmap]
- self.pg_upmap_items_removed = [pg for pg in self.last_pg_upmap if pg not in osdmap.dump().get('pg_upmap_items', '')]
- self.last_pg_upmap = osdmap.dump().get('pg_upmap_items', '')
+ self.pg_upmap_items_added = incdump.get('new_pg_upmap_items', [])
+ self.pg_upmap_items_removed = incdump.get('old_pg_upmap_items', [])
# update pg_upmap_primaries
- self.pg_upmap_primaries_added = [pg for pg in osdmap.dump().get('pg_upmap_primaries', '') if pg not in self.last_pg_upmap_primaries]
- self.pg_upmap_primaries_removed = [pg for pg in self.last_pg_upmap_primaries if pg not in osdmap.dump().get('pg_upmap_primaries', '')]
- self.last_pg_upmap_primaries = osdmap.dump().get('pg_upmap_primaries', '')
+ self.pg_upmap_primaries_added = incdump.get('new_pg_upmap_primaries', [])
+ self.pg_upmap_primaries_removed = incdump.get('old_pg_upmap_primaries', [])
def self_test(self) -> None:
# turn balancer on
diff --git a/src/pybind/mgr/cephadm/cert_mgr.py b/src/pybind/mgr/cephadm/cert_mgr.py
index 9b68e85ca44..0c56c704788 100644
--- a/src/pybind/mgr/cephadm/cert_mgr.py
+++ b/src/pybind/mgr/cephadm/cert_mgr.py
@@ -1,6 +1,6 @@
from cephadm.ssl_cert_utils import SSLCerts, SSLConfigException
-from typing import TYPE_CHECKING, Tuple, Union, List
+from typing import TYPE_CHECKING, Tuple, Union, List, Optional
if TYPE_CHECKING:
from cephadm.module import CephadmOrchestrator
@@ -28,5 +28,10 @@ class CertMgr:
def get_root_ca(self) -> str:
return self.ssl_certs.get_root_cert()
- def generate_cert(self, host_fqdn: Union[str, List[str]], node_ip: Union[str, List[str]]) -> Tuple[str, str]:
- return self.ssl_certs.generate_cert(host_fqdn, node_ip)
+ def generate_cert(
+ self,
+ host_fqdn: Union[str, List[str]],
+ node_ip: Union[str, List[str]],
+ custom_san_list: Optional[List[str]] = None,
+ ) -> Tuple[str, str]:
+ return self.ssl_certs.generate_cert(host_fqdn, node_ip, custom_san_list=custom_san_list)
diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py
index 00410c1bc38..d3681cbe60a 100644
--- a/src/pybind/mgr/cephadm/inventory.py
+++ b/src/pybind/mgr/cephadm/inventory.py
@@ -638,6 +638,9 @@ class TunedProfileStore():
logger.error(
f'Attempted to set setting "{setting}" for nonexistent os tuning profile "{profile}"')
+ def add_settings(self, profile: str, settings: dict) -> None:
+ self.process_settings(profile, settings, action='add')
+
def rm_setting(self, profile: str, setting: str) -> None:
if profile in self.profiles:
if setting in self.profiles[profile].settings:
@@ -651,6 +654,39 @@ class TunedProfileStore():
logger.error(
f'Attempted to remove setting "{setting}" from nonexistent os tuning profile "{profile}"')
+ def rm_settings(self, profile: str, settings: List[str]) -> None:
+ self.process_settings(profile, settings, action='remove')
+
+ def process_settings(self, profile: str, settings: Union[dict, list], action: str) -> None:
+ """
+ Process settings by either adding or removing them based on the action specified.
+ """
+ if profile not in self.profiles:
+ logger.error(f'Attempted to {action} settings for nonexistent os tuning profile "{profile}"')
+ return
+ profile_settings = self.profiles[profile].settings
+ if action == 'remove' and isinstance(settings, list):
+ invalid_settings = [s for s in settings if '=' in s or s not in profile_settings]
+ if invalid_settings:
+ raise OrchestratorError(
+ f"Invalid settings: {', '.join(invalid_settings)}. "
+ "Ensure settings are specified without '=' and exist in the profile. Correct format: key1,key2"
+ )
+ if action == 'add' and isinstance(settings, dict):
+ for setting, value in settings.items():
+ self.profiles[profile].settings[setting] = value
+ elif action == 'remove' and isinstance(settings, list):
+ for setting in settings:
+ self.profiles[profile].settings.pop(setting, '')
+ else:
+ logger.error(
+ f'Invalid action "{action}" for settings modification for tuned profile '
+ f'"{profile}". Valid actions are "add" and "remove"'
+ )
+ return
+ self.profiles[profile]._last_updated = datetime_to_str(datetime_now())
+ self.save()
+
def add_profile(self, spec: TunedProfileSpec) -> None:
spec._last_updated = datetime_to_str(datetime_now())
self.profiles[spec.profile_name] = spec
diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py
index e93a286cec6..e851f1ee3fc 100644
--- a/src/pybind/mgr/cephadm/module.py
+++ b/src/pybind/mgr/cephadm/module.py
@@ -3499,6 +3499,33 @@ Then run the following:
return f'Added setting {setting} with value {value} to tuned profile {profile_name}'
@handle_orch_error
+ def tuned_profile_add_settings(self, profile_name: str, settings: dict) -> str:
+ if profile_name not in self.tuned_profiles:
+ raise OrchestratorError(
+ f"Tuned profile {profile_name} does not exist. Cannot add setting."
+ )
+ self.tuned_profiles.add_settings(profile_name, settings)
+ results = [
+ f"Added setting {key} with value {value} to tuned profile {profile_name}"
+ for key, value in settings.items()
+ ]
+ self._kick_serve_loop()
+ return "\n".join(results)
+
+ @handle_orch_error
+ def tuned_profile_rm_settings(self, profile_name: str, settings: List[str]) -> str:
+ if profile_name not in self.tuned_profiles:
+ raise OrchestratorError(
+ f"Tuned profile {profile_name} does not exist. Cannot remove setting."
+ )
+ self.tuned_profiles.rm_settings(profile_name, settings)
+ results = [
+ f'Removed setting {settings} from tuned profile {profile_name}'
+ ]
+ self._kick_serve_loop()
+ return "\n".join(results)
+
+ @handle_orch_error
def tuned_profile_rm_setting(self, profile_name: str, setting: str) -> str:
if profile_name not in self.tuned_profiles:
raise OrchestratorError(
diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py
index 4a7959ae045..8e9cd00fa81 100644
--- a/src/pybind/mgr/cephadm/serve.py
+++ b/src/pybind/mgr/cephadm/serve.py
@@ -1436,8 +1436,24 @@ class CephadmServe:
config_blobs=daemon_spec.final_config,
).dump_json_str(),
use_current_daemon_image=reconfig,
+ error_ok=True
)
+ # return number corresponding to DAEMON_FAILED_ERROR
+ # in src/cephadm/cephadmlib/constants.
+ # TODO: link these together so one cannot be changed without the other
+ if code == 17:
+ # daemon failed on systemctl start command, meaning while
+ # deployment failed the daemon is present and we should handle
+ # this as if the deploy command "succeeded" and mark the daemon
+ # as failed later when we fetch its status
+ self.mgr.log.error(f'Deployment of {daemon_spec.name()} failed during "systemctl start" command')
+ elif code:
+ # some other failure earlier in the deploy process. Just raise an exception
+ # the same as we would in _run_cephadm on a nonzero rc
+ raise OrchestratorError(
+ f'cephadm exited with an error code: {code}, stderr: {err}')
+
if daemon_spec.daemon_type == 'agent':
self.mgr.agent_cache.agent_timestamp[daemon_spec.host] = datetime_now()
self.mgr.agent_cache.agent_counter[daemon_spec.host] = 1
diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py
index 9043577bc5a..04f5af28a9b 100644
--- a/src/pybind/mgr/cephadm/services/cephadmservice.py
+++ b/src/pybind/mgr/cephadm/services/cephadmservice.py
@@ -1015,12 +1015,6 @@ class RgwService(CephService):
# set rgw_realm rgw_zonegroup and rgw_zone, if present
self.set_realm_zg_zone(spec)
- if spec.generate_cert and not spec.rgw_frontend_ssl_certificate:
- # generate a self-signed cert for the rgw service
- cert, key = self.mgr.cert_mgr.ssl_certs.generate_root_cert(custom_san_list=spec.zonegroup_hostnames)
- spec.rgw_frontend_ssl_certificate = ''.join([key, cert])
- self.mgr.spec_store.save(spec)
-
if spec.rgw_frontend_ssl_certificate:
if isinstance(spec.rgw_frontend_ssl_certificate, list):
cert_data = '\n'.join(spec.rgw_frontend_ssl_certificate)
@@ -1068,6 +1062,19 @@ class RgwService(CephService):
# and it matches the spec.
port = spec.get_port()
+ if spec.generate_cert:
+ cert, key = self.mgr.cert_mgr.generate_cert(
+ daemon_spec.host,
+ self.mgr.inventory.get_addr(daemon_spec.host),
+ custom_san_list=spec.zonegroup_hostnames
+ )
+ pem = ''.join([key, cert])
+ ret, out, err = self.mgr.check_mon_command({
+ 'prefix': 'config-key set',
+ 'key': f'rgw/cert/{daemon_spec.name()}',
+ 'val': pem,
+ })
+
# configure frontend
args = []
ftype = spec.rgw_frontend_type or "beast"
@@ -1078,7 +1085,10 @@ class RgwService(CephService):
f"ssl_endpoint={build_url(host=daemon_spec.ip, port=port).lstrip('/')}")
else:
args.append(f"ssl_port={port}")
- args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}")
+ if spec.generate_cert:
+ args.append(f"ssl_certificate=config://rgw/cert/{daemon_spec.name()}")
+ else:
+ args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}")
else:
if daemon_spec.ip:
args.append(f"endpoint={build_url(host=daemon_spec.ip, port=port).lstrip('/')}")
@@ -1091,7 +1101,10 @@ class RgwService(CephService):
args.append(f"port={build_url(host=daemon_spec.ip, port=port).lstrip('/')}s")
else:
args.append(f"port={port}s") # note the 's' suffix on port
- args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}")
+ if spec.generate_cert:
+ args.append(f"ssl_certificate=config://rgw/cert/{daemon_spec.name()}")
+ else:
+ args.append(f"ssl_certificate=config://rgw/cert/{spec.service_name()}")
else:
if daemon_spec.ip:
args.append(f"port={build_url(host=daemon_spec.ip, port=port).lstrip('/')}")
@@ -1180,6 +1193,10 @@ class RgwService(CephService):
'who': utils.name_to_config_section(daemon.name()),
'name': 'rgw_frontends',
})
+ self.mgr.check_mon_command({
+ 'prefix': 'config-key rm',
+ 'key': f'rgw/cert/{daemon.name()}',
+ })
def ok_to_stop(
self,
diff --git a/src/pybind/mgr/cephadm/services/nvmeof.py b/src/pybind/mgr/cephadm/services/nvmeof.py
index 13f75881453..418be93b6af 100644
--- a/src/pybind/mgr/cephadm/services/nvmeof.py
+++ b/src/pybind/mgr/cephadm/services/nvmeof.py
@@ -66,6 +66,10 @@ class NvmeofService(CephService):
daemon_spec.keyring = keyring
daemon_spec.extra_files = {'ceph-nvmeof.conf': gw_conf}
+ # Indicate to the daemon whether to utilize huge pages
+ if spec.spdk_mem_size:
+ daemon_spec.extra_files['spdk_mem_size'] = str(spec.spdk_mem_size)
+
if spec.enable_auth:
if (
not spec.client_cert
diff --git a/src/pybind/mgr/cephadm/ssl_cert_utils.py b/src/pybind/mgr/cephadm/ssl_cert_utils.py
index 930b276c8de..467b32a4df0 100644
--- a/src/pybind/mgr/cephadm/ssl_cert_utils.py
+++ b/src/pybind/mgr/cephadm/ssl_cert_utils.py
@@ -70,7 +70,12 @@ class SSLCerts:
return (cert_str, key_str)
- def generate_cert(self, _hosts: Union[str, List[str]], _addrs: Union[str, List[str]]) -> Tuple[str, str]:
+ def generate_cert(
+ self,
+ _hosts: Union[str, List[str]],
+ _addrs: Union[str, List[str]],
+ custom_san_list: Optional[List[str]] = None,
+ ) -> Tuple[str, str]:
addrs = [_addrs] if isinstance(_addrs, str) else _addrs
hosts = [_hosts] if isinstance(_hosts, str) else _hosts
@@ -97,6 +102,8 @@ class SSLCerts:
san_list: List[x509.GeneralName] = [x509.DNSName(host) for host in hosts]
if valid_ips:
san_list.extend(ips)
+ if custom_san_list:
+ san_list.extend([x509.DNSName(n) for n in custom_san_list])
builder = builder.add_extension(
x509.SubjectAlternativeName(
diff --git a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2
index 760bc97e515..73f36952ee7 100644
--- a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2
+++ b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2
@@ -8,6 +8,7 @@ enable_auth = {{ spec.enable_auth }}
state_update_notify = {{ spec.state_update_notify }}
state_update_interval_sec = {{ spec.state_update_interval_sec }}
enable_spdk_discovery_controller = {{ spec.enable_spdk_discovery_controller }}
+enable_key_encryption = {{ spec.enable_key_encryption }}
enable_prometheus_exporter = {{ spec.enable_prometheus_exporter }}
prometheus_exporter_ssl = False
prometheus_port = 10008
@@ -22,6 +23,10 @@ ping_spdk_under_lock = {{ spec.ping_spdk_under_lock }}
enable_monitor_client = {{ spec.enable_monitor_client }}
max_hosts_per_namespace = {{ spec.max_hosts_per_namespace }}
max_namespaces_with_netmask = {{ spec.max_namespaces_with_netmask }}
+max_subsystems = {{ spec.max_subsystems }}
+max_namespaces = {{ spec.max_namespaces }}
+max_namespaces_per_subsystem = {{ spec.max_namespaces_per_subsystem }}
+max_hosts_per_subsystem = {{ spec.max_hosts_per_subsystem }}
[gateway-logs]
log_level = {{ spec.log_level }}
@@ -65,6 +70,9 @@ protocol_log_level = {{ spec.spdk_protocol_log_level }}
log_file_dir = {{ spec.spdk_log_file_dir }}
{% endif %}
conn_retries = {{ spec.conn_retries }}
+{% if spec.spdk_mem_size %}
+mem_size = {{ spec.spdk_mem_size }}
+{% endif %}
transports = {{ spec.transports }}
{% if transport_tcp_options %}
transport_tcp_options = {{ transport_tcp_options }}
diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py
index 6d0c00d408c..3c647476e44 100644
--- a/src/pybind/mgr/cephadm/tests/test_cephadm.py
+++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py
@@ -136,7 +136,7 @@ def with_osd_daemon(cephadm_module: CephadmOrchestrator, _run_cephadm, host: str
mock.call(host, 'osd', 'ceph-volume',
['--', 'lvm', 'list', '--format', 'json'],
no_fsid=False, error_ok=False, image='', log_output=True, use_current_daemon_image=False),
- mock.call(host, f'osd.{osd_id}', ['_orch', 'deploy'], [], stdin=mock.ANY, use_current_daemon_image=False),
+ mock.call(host, f'osd.{osd_id}', ['_orch', 'deploy'], [], stdin=mock.ANY, error_ok=True, use_current_daemon_image=False),
mock.call(host, 'osd', 'ceph-volume',
['--', 'raw', 'list', '--format', 'json'],
no_fsid=False, error_ok=False, image='', log_output=True, use_current_daemon_image=False),
@@ -499,7 +499,7 @@ class TestCephadm(object):
CephadmServe(cephadm_module)._check_daemons()
- assert _save_host.called_with('test')
+ _save_host.assert_called_with('test')
assert cephadm_module.cache.get_scheduled_daemon_action('test', daemon_name) is None
@mock.patch("cephadm.serve.CephadmServe._run_cephadm")
@@ -563,6 +563,7 @@ class TestCephadm(object):
},
},
}),
+ error_ok=True,
use_current_daemon_image=True,
)
@@ -618,6 +619,7 @@ class TestCephadm(object):
"crush_location": "datacenter=a",
},
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -660,6 +662,7 @@ class TestCephadm(object):
"keyring": "[client.crash.test]\nkey = None\n",
},
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -702,6 +705,7 @@ class TestCephadm(object):
},
"config_blobs": {},
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -752,6 +756,7 @@ class TestCephadm(object):
},
"config_blobs": {},
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -806,6 +811,7 @@ class TestCephadm(object):
},
"config_blobs": {},
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -2849,15 +2855,15 @@ Traceback (most recent call last):
# pass force=true in these tests to bypass _admin label check
with with_host(cephadm_module, 'test', refresh_hosts=False, rm_with_force=True):
cephadm_module.drain_host('test', force=True, zap_osd_devices=False)
- assert _rm_osds.called_with([], zap=False)
+ _rm_osds.assert_called_with([], zap=False)
with with_host(cephadm_module, 'test', refresh_hosts=False, rm_with_force=True):
cephadm_module.drain_host('test', force=True, zap_osd_devices=True)
- assert _rm_osds.called_with([], zap=True)
+ _rm_osds.assert_called_with([], zap=True)
with pytest.raises(OrchestratorError, match=r"Cannot find host 'host1' in the inventory."):
cephadm_module.drain_host('host1', force=True, zap_osd_devices=True)
- assert _rm_osds.called_with([], zap=True)
+ _rm_osds.assert_called_with([], zap=True)
def test_process_ls_output(self, cephadm_module):
sample_ls_output = """[
diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py
index 824e37cf4d4..75c7c3c5bf7 100644
--- a/src/pybind/mgr/cephadm/tests/test_services.py
+++ b/src/pybind/mgr/cephadm/tests/test_services.py
@@ -349,6 +349,7 @@ log_to_file = False"""
},
}
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -397,6 +398,7 @@ enable_auth = False
state_update_notify = True
state_update_interval_sec = 5
enable_spdk_discovery_controller = False
+enable_key_encryption = True
enable_prometheus_exporter = True
prometheus_exporter_ssl = False
prometheus_port = 10008
@@ -409,8 +411,12 @@ allowed_consecutive_spdk_ping_failures = 1
spdk_ping_interval_in_seconds = 2.0
ping_spdk_under_lock = False
enable_monitor_client = True
-max_hosts_per_namespace = 1
+max_hosts_per_namespace = 8
max_namespaces_with_netmask = 1000
+max_subsystems = 128
+max_namespaces = 1024
+max_namespaces_per_subsystem = 256
+max_hosts_per_subsystem = 32
[gateway-logs]
log_level = INFO
@@ -489,6 +495,7 @@ timeout = 1.0\n"""
}
}
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -601,6 +608,7 @@ class TestMonitoring:
"use_url_prefix": False,
}
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -696,6 +704,7 @@ class TestMonitoring:
"use_url_prefix": True,
}
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -788,6 +797,7 @@ class TestMonitoring:
"use_url_prefix": False,
}
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -833,6 +843,7 @@ class TestMonitoring:
"files": {
"ceph-exporter.crt": "mycert",
"ceph-exporter.key": "mykey"}}}),
+ error_ok=True,
use_current_daemon_image=False)
@patch("cephadm.serve.CephadmServe._run_cephadm")
@@ -876,6 +887,7 @@ class TestMonitoring:
},
"config_blobs": {}
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -936,6 +948,7 @@ class TestMonitoring:
'web_config': '/etc/node-exporter/web.yml',
}
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -1071,6 +1084,7 @@ class TestMonitoring:
"use_url_prefix": False
},
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -1303,6 +1317,7 @@ class TestMonitoring:
"use_url_prefix": False
},
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -1379,6 +1394,7 @@ class TestMonitoring:
},
},
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -1438,6 +1454,7 @@ class TestMonitoring:
},
},
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -1588,6 +1605,7 @@ class TestMonitoring:
"files": files,
},
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -1718,6 +1736,7 @@ class TestMonitoring:
"files": files,
},
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -1831,6 +1850,7 @@ class TestMonitoring:
"files": files,
},
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -2005,6 +2025,7 @@ spec:
},
"config_blobs": {},
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -2112,6 +2133,7 @@ class TestSNMPGateway:
},
"config_blobs": config,
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -2160,6 +2182,7 @@ class TestSNMPGateway:
},
"config_blobs": config,
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -2212,6 +2235,7 @@ class TestSNMPGateway:
},
"config_blobs": config,
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -2269,6 +2293,7 @@ class TestSNMPGateway:
},
"config_blobs": config,
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -3361,6 +3386,7 @@ class TestJaeger:
},
"config_blobs": config,
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -3401,6 +3427,7 @@ class TestJaeger:
},
"config_blobs": es_config,
}),
+ error_ok=True,
use_current_daemon_image=False,
)
with with_service(cephadm_module, collector_spec):
@@ -3429,6 +3456,7 @@ class TestJaeger:
},
"config_blobs": collector_config,
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -3469,6 +3497,7 @@ class TestJaeger:
},
"config_blobs": collector_config,
}),
+ error_ok=True,
use_current_daemon_image=False,
)
with with_service(cephadm_module, agent_spec):
@@ -3497,6 +3526,7 @@ class TestJaeger:
},
"config_blobs": agent_config,
}),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -3554,6 +3584,7 @@ class TestCustomContainer:
},
}
),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -3641,6 +3672,7 @@ class TestCustomContainer:
['_orch', 'deploy'],
[],
stdin=json.dumps(expected),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -3694,6 +3726,7 @@ class TestSMB:
['_orch', 'deploy'],
[],
stdin=json.dumps(expected),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -3766,6 +3799,7 @@ class TestSMB:
['_orch', 'deploy'],
[],
stdin=json.dumps(expected),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -4011,6 +4045,7 @@ class TestMgmtGateway:
['_orch', 'deploy'],
[],
stdin=json.dumps(expected),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -4352,6 +4387,7 @@ class TestMgmtGateway:
['_orch', 'deploy'],
[],
stdin=json.dumps(expected),
+ error_ok=True,
use_current_daemon_image=False,
)
@@ -4475,5 +4511,6 @@ class TestMgmtGateway:
['_orch', 'deploy'],
[],
stdin=json.dumps(expected),
+ error_ok=True,
use_current_daemon_image=False,
)
diff --git a/src/pybind/mgr/mgr_module.py b/src/pybind/mgr/mgr_module.py
index 29ddff2ffc2..76ad8d9d0ce 100644
--- a/src/pybind/mgr/mgr_module.py
+++ b/src/pybind/mgr/mgr_module.py
@@ -1285,7 +1285,7 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin):
if latest < version:
raise RuntimeError(f"main.db version is newer ({version}) than module ({latest})")
for i in range(version, latest):
- self.log.info(f"upgrading main.db for {self.module_name} from {i-1}:{i}")
+ self.log.info(f"upgrading main.db for {self.module_name} from {i - 1}:{i}")
for sql in self.SCHEMA_VERSIONED[i]:
db.execute(sql)
if version < latest:
diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py
index efe0de55bd0..a505801eea5 100644
--- a/src/pybind/mgr/orchestrator/_interface.py
+++ b/src/pybind/mgr/orchestrator/_interface.py
@@ -901,10 +901,18 @@ class Orchestrator(object):
"""Change/Add a specific setting for a tuned profile"""
raise NotImplementedError()
+ def tuned_profile_add_settings(self, profile_name: str, setting: dict) -> OrchResult[str]:
+ """Change/Add multiple settings for a tuned profile"""
+ raise NotImplementedError()
+
def tuned_profile_rm_setting(self, profile_name: str, setting: str) -> OrchResult[str]:
"""Remove a specific setting for a tuned profile"""
raise NotImplementedError()
+ def tuned_profile_rm_settings(self, profile_name: str, settings: List[str]) -> OrchResult[str]:
+ """Remove multiple settings from a tuned profile"""
+ raise NotImplementedError
+
def upgrade_check(self, image: Optional[str], version: Optional[str]) -> OrchResult[str]:
raise NotImplementedError()
diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py
index fdb9280d7ed..332bc75d862 100644
--- a/src/pybind/mgr/orchestrator/module.py
+++ b/src/pybind/mgr/orchestrator/module.py
@@ -2250,6 +2250,39 @@ Usage:
res = raise_if_exception(completion)
return HandleCommandResult(stdout=res)
+ @_cli_write_command("orch tuned-profile add-settings")
+ def _tuned_profile_add_settings(self, profile_name: str, settings: str) -> HandleCommandResult:
+ try:
+ setting_pairs = settings.split(",")
+ parsed_setting = {}
+ parsed_setting = {key.strip(): value.strip() for key, value in (s.split('=', 1) for s in setting_pairs)}
+ completion = self.tuned_profile_add_settings(profile_name, parsed_setting)
+ res = raise_if_exception(completion)
+ return HandleCommandResult(stdout=res)
+ except ValueError:
+ error_message = (
+ "Error: Invalid format detected. "
+ "The correct format is key=value pairs separated by commas,"
+ "e.g., 'vm.swappiness=11,vm.user_reserve_kbytes=116851'"
+ )
+ return HandleCommandResult(stderr=error_message)
+
+ @_cli_write_command("orch tuned-profile rm-settings")
+ def _tuned_profile_rm_settings(self, profile_name: str, settings: str) -> HandleCommandResult:
+ try:
+ setting = [s.strip() for s in settings.split(",") if s.strip()]
+ if not setting:
+ raise ValueError(
+ "Error: Invalid format."
+ "The correct format is key1,key2"
+ "e.g., vm.swappiness,vm.user_reserve_kbytes"
+ )
+ completion = self.tuned_profile_rm_settings(profile_name, setting)
+ res = raise_if_exception(completion)
+ return HandleCommandResult(stdout=res)
+ except ValueError as e:
+ return HandleCommandResult(stderr=str(e))
+
def self_test(self) -> None:
old_orch = self._select_orchestrator()
self._set_backend('')
diff --git a/src/python-common/CMakeLists.txt b/src/python-common/CMakeLists.txt
index e89bbe2feef..08660342a6a 100644
--- a/src/python-common/CMakeLists.txt
+++ b/src/python-common/CMakeLists.txt
@@ -3,5 +3,5 @@ distutils_install_module(ceph)
if(WITH_TESTS)
include(AddCephTest)
- add_tox_test(python-common TOX_ENVS py3 lint)
+ add_tox_test(python-common TOX_ENVS __tox_defaults__)
endif()
diff --git a/src/python-common/ceph/deployment/drive_group.py b/src/python-common/ceph/deployment/drive_group.py
index c68ee01a728..43175aa79fb 100644
--- a/src/python-common/ceph/deployment/drive_group.py
+++ b/src/python-common/ceph/deployment/drive_group.py
@@ -2,7 +2,7 @@ import enum
import yaml
from ceph.deployment.inventory import Device
-from ceph.deployment.service_spec import (
+from ceph.deployment.service_spec import ( # noqa: F401 (type comments)
CustomConfig,
GeneralArgList,
PlacementSpec,
@@ -11,7 +11,7 @@ from ceph.deployment.service_spec import (
from ceph.deployment.hostspec import SpecValidationError
try:
- from typing import Optional, List, Dict, Any, Union
+ from typing import Optional, List, Dict, Any, Union # noqa: F401
except ImportError:
pass
diff --git a/src/python-common/ceph/deployment/drive_selection/filter.py b/src/python-common/ceph/deployment/drive_selection/filter.py
index 0da1b5c3901..28f63ddc2f2 100644
--- a/src/python-common/ceph/deployment/drive_selection/filter.py
+++ b/src/python-common/ceph/deployment/drive_selection/filter.py
@@ -15,12 +15,10 @@ logger = logging.getLogger(__name__)
class FilterGenerator(object):
- def __init__(self, device_filter):
- # type: (DeviceSelection) -> None
+ def __init__(self, device_filter: DeviceSelection) -> None:
self.device_filter = device_filter
- def __iter__(self):
- # type: () -> Generator[Matcher, None, None]
+ def __iter__(self) -> Generator[Matcher, None, None]:
if self.device_filter.actuators:
yield EqualityMatcher('actuators', self.device_filter.actuators)
if self.device_filter.size:
diff --git a/src/python-common/ceph/deployment/drive_selection/matchers.py b/src/python-common/ceph/deployment/drive_selection/matchers.py
index df502410aeb..a6a2147ce9e 100644
--- a/src/python-common/ceph/deployment/drive_selection/matchers.py
+++ b/src/python-common/ceph/deployment/drive_selection/matchers.py
@@ -1,8 +1,9 @@
# -*- coding: utf-8 -*-
-from typing import Tuple, Optional, Any, Union, Iterator
+# TODO: remove noqa and update to python3/mypy style type annotations
+from typing import Tuple, Optional, Any, Union, Iterator # noqa: F401
-from ceph.deployment.inventory import Device
+from ceph.deployment.inventory import Device # noqa: F401
import re
import logging
diff --git a/src/python-common/ceph/deployment/drive_selection/selector.py b/src/python-common/ceph/deployment/drive_selection/selector.py
index 59ebbb6347e..85fc95cf394 100644
--- a/src/python-common/ceph/deployment/drive_selection/selector.py
+++ b/src/python-common/ceph/deployment/drive_selection/selector.py
@@ -3,7 +3,7 @@ import logging
from typing import List, Optional, Dict, Callable
from ..inventory import Device
-from ..drive_group import DriveGroupSpec, DeviceSelection, DriveGroupValidationError
+from ..drive_group import DriveGroupSpec, DeviceSelection, DriveGroupValidationError # noqa: F401
from .filter import FilterGenerator
from .matchers import _MatchInvalid
diff --git a/src/python-common/ceph/deployment/inventory.py b/src/python-common/ceph/deployment/inventory.py
index e2c1a5605f9..29475e94d82 100644
--- a/src/python-common/ceph/deployment/inventory.py
+++ b/src/python-common/ceph/deployment/inventory.py
@@ -1,5 +1,5 @@
try:
- from typing import List, Optional, Dict, Any, Union
+ from typing import List, Optional, Dict, Any, Union # noqa: F401
except ImportError:
pass # for type checking
diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py
index 979c14f7d00..10e9f267940 100644
--- a/src/python-common/ceph/deployment/service_spec.py
+++ b/src/python-common/ceph/deployment/service_spec.py
@@ -527,8 +527,8 @@ pattern_type=PatternType.fnmatch))
labels = [x for x in strings if 'label:' in x]
if len(labels) > 1:
raise SpecValidationError('more than one label provided: {}'.format(labels))
- for l in labels:
- strings.remove(l)
+ for lbl in labels:
+ strings.remove(lbl)
label = labels[0][6:] if labels else None
host_patterns = strings
@@ -701,7 +701,7 @@ class ArgumentSpec:
if isinstance(data, str):
return cls(data, split=True, origin=cls.OriginalType.STRING)
if 'argument' not in data:
- raise SpecValidationError(f'ArgumentSpec must have an "argument" field')
+ raise SpecValidationError('ArgumentSpec must have an "argument" field')
for k in data.keys():
if k not in cls._fields:
raise SpecValidationError(f'ArgumentSpec got an unknown field {k!r}')
@@ -1313,6 +1313,10 @@ class RGWSpec(ServiceSpec):
raise SpecValidationError('"ssl" field must be set to true when "generate_cert" '
'is set to true')
+ if self.generate_cert and self.rgw_frontend_ssl_certificate:
+ raise SpecValidationError('"generate_cert" field and "rgw_frontend_ssl_certificate" '
+ 'field are mutually exclusive')
+
yaml.add_representer(RGWSpec, ServiceSpec.yaml_representer)
@@ -1330,6 +1334,7 @@ class NvmeofServiceSpec(ServiceSpec):
state_update_notify: Optional[bool] = True,
state_update_interval_sec: Optional[int] = 5,
enable_spdk_discovery_controller: Optional[bool] = False,
+ enable_key_encryption: Optional[bool] = True,
omap_file_lock_duration: Optional[int] = 20,
omap_file_lock_retries: Optional[int] = 30,
omap_file_lock_retry_sleep_interval: Optional[float] = 1.0,
@@ -1340,14 +1345,20 @@ class NvmeofServiceSpec(ServiceSpec):
allowed_consecutive_spdk_ping_failures: Optional[int] = 1,
spdk_ping_interval_in_seconds: Optional[float] = 2.0,
ping_spdk_under_lock: Optional[bool] = False,
- max_hosts_per_namespace: Optional[int] = 1,
+ max_hosts_per_namespace: Optional[int] = 8,
max_namespaces_with_netmask: Optional[int] = 1000,
+ max_subsystems: Optional[int] = 128,
+ max_namespaces: Optional[int] = 1024,
+ max_namespaces_per_subsystem: Optional[int] = 256,
+ max_hosts_per_subsystem: Optional[int] = 32,
server_key: Optional[str] = None,
server_cert: Optional[str] = None,
client_key: Optional[str] = None,
client_cert: Optional[str] = None,
root_ca_cert: Optional[str] = None,
+ # unused and duplicate of tgt_path below, consider removing
spdk_path: Optional[str] = None,
+ spdk_mem_size: Optional[int] = None,
tgt_path: Optional[str] = None,
spdk_timeout: Optional[float] = 60.0,
spdk_log_level: Optional[str] = '',
@@ -1409,6 +1420,8 @@ class NvmeofServiceSpec(ServiceSpec):
self.state_update_interval_sec = state_update_interval_sec
#: ``enable_spdk_discovery_controller`` SPDK or ceph-nvmeof discovery service
self.enable_spdk_discovery_controller = enable_spdk_discovery_controller
+ #: ``enable_key_encryption`` encrypt DHCHAP and PSK keys before saving in OMAP
+ self.enable_key_encryption = enable_key_encryption
#: ``enable_prometheus_exporter`` enables Prometheus exporter
self.enable_prometheus_exporter = enable_prometheus_exporter
#: ``verify_nqns`` enables verification of subsystem and host NQNs for validity
@@ -1425,6 +1438,14 @@ class NvmeofServiceSpec(ServiceSpec):
self.max_hosts_per_namespace = max_hosts_per_namespace
#: ``max_namespaces_with_netmask`` max number of namespaces which are not auto visible
self.max_namespaces_with_netmask = max_namespaces_with_netmask
+ #: ``max_subsystems`` max number of subsystems
+ self.max_subsystems = max_subsystems
+ #: ``max_namespaces`` max number of namespaces on all subsystems
+ self.max_namespaces = max_namespaces
+ #: ``max_namespaces_per_subsystem`` max number of namespaces per one subsystem
+ self.max_namespaces_per_subsystem = max_namespaces_per_subsystem
+ #: ``max_hosts_per_subsystem`` max number of hosts per subsystems
+ self.max_hosts_per_subsystem = max_hosts_per_subsystem
#: ``allowed_consecutive_spdk_ping_failures`` # of ping failures before aborting gateway
self.allowed_consecutive_spdk_ping_failures = allowed_consecutive_spdk_ping_failures
#: ``spdk_ping_interval_in_seconds`` sleep interval in seconds between SPDK pings
@@ -1443,8 +1464,10 @@ class NvmeofServiceSpec(ServiceSpec):
self.client_cert = client_cert
#: ``root_ca_cert`` CA cert for server/client certs
self.root_ca_cert = root_ca_cert
- #: ``spdk_path`` path to SPDK
+ #: ``spdk_path`` path is unused and duplicate of tgt_path below, consider removing
self.spdk_path = spdk_path or '/usr/local/bin/nvmf_tgt'
+ #: ``spdk_mem_size`` memory size in MB for DPDK
+ self.spdk_mem_size = spdk_mem_size
#: ``tgt_path`` nvmeof target path
self.tgt_path = tgt_path or '/usr/local/bin/nvmf_tgt'
#: ``spdk_timeout`` SPDK connectivity timeout
@@ -1613,6 +1636,36 @@ class NvmeofServiceSpec(ServiceSpec):
):
raise SpecValidationError("Log file directory backups can't be negative")
+ if (self.max_hosts_per_namespace and self.max_hosts_per_namespace < 0):
+ raise SpecValidationError("Max hosts per namespace can't be negative")
+
+ if (self.max_namespaces_with_netmask and self.max_namespaces_with_netmask < 0):
+ raise SpecValidationError("Max namespaces with netmask can't be negative")
+
+ if not isinstance(self.max_subsystems, int):
+ raise SpecValidationError("Max subsystems must be an integer")
+
+ if self.max_subsystems <= 0:
+ raise SpecValidationError("Max subsystems must be greater than zero")
+
+ if not isinstance(self.max_namespaces, int):
+ raise SpecValidationError("Max namespaces must be an integer")
+
+ if self.max_namespaces <= 0:
+ raise SpecValidationError("Max namespaces must be greater than zero")
+
+ if not isinstance(self.max_namespaces_per_subsystem, int):
+ raise SpecValidationError("Max namespaces per subsystem must be an integer")
+
+ if self.max_namespaces_per_subsystem <= 0:
+ raise SpecValidationError("Max namespaces per subsystem must be greater than zero")
+
+ if not isinstance(self.max_hosts_per_subsystem, int):
+ raise SpecValidationError("Max hosts per subsystem must be an integer")
+
+ if self.max_hosts_per_subsystem <= 0:
+ raise SpecValidationError("Max hosts per subsystem must be greater than zero")
+
if (
self.monitor_timeout
and self.monitor_timeout < 0.0
diff --git a/src/python-common/ceph/deployment/translate.py b/src/python-common/ceph/deployment/translate.py
index 49fb17da725..9dfe7cfcf81 100644
--- a/src/python-common/ceph/deployment/translate.py
+++ b/src/python-common/ceph/deployment/translate.py
@@ -5,7 +5,7 @@ try:
except ImportError:
pass
-from ceph.deployment.drive_selection.selector import DriveSelection
+from ceph.deployment.drive_selection.selector import DriveSelection # noqa: F401
logger = logging.getLogger(__name__)
diff --git a/src/python-common/ceph/fs/earmarking.py b/src/python-common/ceph/fs/earmarking.py
index c5d4a59a4d5..f4fd4ddf96c 100644
--- a/src/python-common/ceph/fs/earmarking.py
+++ b/src/python-common/ceph/fs/earmarking.py
@@ -19,13 +19,25 @@ supported top-level scopes.
import errno
import enum
import logging
-from typing import List, NamedTuple, Optional, Tuple
+from typing import List, NamedTuple, Optional, Tuple, Protocol
log = logging.getLogger(__name__)
XATTR_SUBVOLUME_EARMARK_NAME = 'user.ceph.subvolume.earmark'
+class FSOperations(Protocol):
+ """Protocol class representing the file system operations earmarking
+ classes will perform.
+ """
+
+ def setxattr(
+ self, path: str, key: str, value: bytes, flags: int
+ ) -> None: ...
+
+ def getxattr(self, path: str, key: str) -> bytes: ...
+
+
class EarmarkTopScope(enum.Enum):
NFS = "nfs"
SMB = "smb"
@@ -53,11 +65,11 @@ class EarmarkParseError(ValueError):
class CephFSVolumeEarmarking:
- def __init__(self, fs, path: str) -> None:
+ def __init__(self, fs: FSOperations, path: str) -> None:
self.fs = fs
self.path = path
- def _handle_cephfs_error(self, e: Exception, action: str) -> None:
+ def _handle_cephfs_error(self, e: Exception, action: str) -> Optional[str]:
if isinstance(e, ValueError):
raise EarmarkException(errno.EINVAL, f"Invalid earmark specified: {e}") from e
elif isinstance(e, OSError):
@@ -135,7 +147,7 @@ class CephFSVolumeEarmarking:
except Exception as e:
return self._handle_cephfs_error(e, "getting")
- def set_earmark(self, earmark: str):
+ def set_earmark(self, earmark: str) -> None:
# Validate the earmark before attempting to set it
if not self._validate_earmark(earmark):
raise EarmarkException(
diff --git a/src/python-common/ceph/tests/utils.py b/src/python-common/ceph/tests/utils.py
index 04b8a4e3895..20a39e4666b 100644
--- a/src/python-common/ceph/tests/utils.py
+++ b/src/python-common/ceph/tests/utils.py
@@ -35,8 +35,7 @@ def _mk_device(rotational=True,
)]
-def _mk_inventory(devices):
- # type: (Any) -> List[Device]
+def _mk_inventory(devices: Any) -> List[Device]:
devs = []
for dev_, name in zip(devices, map(chr, range(ord('a'), ord('z')))):
dev = Device.from_json(dev_.to_json())
diff --git a/src/python-common/requirements-lint.txt b/src/python-common/requirements-lint.txt
deleted file mode 100644
index 2a7142182c2..00000000000
--- a/src/python-common/requirements-lint.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-flake8==3.7.8
-rstcheck==3.3.1
diff --git a/src/python-common/tox.ini b/src/python-common/tox.ini
index 313a4334d51..e0b59c700ca 100644
--- a/src/python-common/tox.ini
+++ b/src/python-common/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist = py3, mypy, lint
+envlist = lint, rstcheck, mypy, py3
skip_missing_interpreters = true
[testenv:py3]
@@ -26,9 +26,13 @@ exclude =
__pycache__
[testenv:lint]
-deps =
- -rrequirements-lint.txt
+deps =
+ flake8
commands =
flake8 {posargs:ceph}
- rstcheck --report info --debug README.rst
+[testenv:rstcheck]
+deps =
+ rstcheck
+commands =
+ rstcheck --report-level info README.rst
diff --git a/src/rgw/driver/dbstore/README.md b/src/rgw/driver/dbstore/README.md
index f7e5df331cc..2bf3391c8c4 100644
--- a/src/rgw/driver/dbstore/README.md
+++ b/src/rgw/driver/dbstore/README.md
@@ -15,23 +15,21 @@ Add below cmake option (enabled by default)
## Running Test cluster
-Edit ceph.conf to add below option
+Edit ceph.conf to add below options
[client]
rgw backend store = dbstore
rgw config store = dbstore
-Start vstart cluster
+To start the `vstart` cluster, run the following cmd:
- MON=1 RGW=1 ../src/vstart.sh -o rgw_backend_store=dbstore -o rgw_config_store=dbstore -n -d
+ MON=0 OSD=0 MDS=0 MGR=0 RGW=1 ../src/vstart.sh -n -d --rgw_store dbstore
-The above vstart command brings up RGW server on dbstore. It creates default zonegroup, zone and few default users (eg., testid) to be used for s3 operations.
+The above `vstart` command brings up the RGW server on DBStore without the need for MONs or OSDs. It creates a default zonegroup, zone, and few default users (e.g., `testid`) to be used for S3 operations, and generates database files in the `dev` subdirectory, by default, to store them.
-`radosgw-admin` can be used to create and remove other users, zonegroups and zones.
-
-
-By default, dbstore creates .db file *'/var/lib/ceph/radosgw/dbstore-default_ns.db'* to store the data and *'/var/lib/ceph/radosgw/dbstore-config.db'* file to store the configuration. This can be configured using below options in ceph.conf
+`radosgw-admin` command can be used to create and remove other users, zonegroups and zones.
+The location and prefix for the database files can be configured using the following options:
[client]
dbstore db dir = <path for the directory for storing the db backend store data>
dbstore db name prefix = <prefix to the file names created by db backend store>
diff --git a/src/rgw/driver/posix/README.md b/src/rgw/driver/posix/README.md
index 02dc8dfbe85..73971edc86f 100644
--- a/src/rgw/driver/posix/README.md
+++ b/src/rgw/driver/posix/README.md
@@ -23,15 +23,15 @@ Edit ceph.conf to add below option
rgw config store = dbstore
rgw filter = posix
-Start vstart cluster
+To start the `vstart` cluster, run the following cmd:
- MON=0 OSD=0 MDS=0 MGR=0 RGW=1 ../src/vstart.sh -o rgw_backend_store=dbstore -o rgw_config_store=dbstore -o rgw_filter=posix -n -d
+ MON=0 OSD=0 MDS=0 MGR=0 RGW=1 ../src/vstart.sh -n -d --rgw_store posix
-The above vstart command brings up RGW server on POSIXDriver. It creates default zonegroup, zone and few default users (eg., testid) to be used for s3 operations.
+The above vstart command brings up RGW server on POSIXDriver. It creates default zonegroup, zone and few default users (e.g., testid) to be used for s3 operations.
-`radosgw-admin` can be used to create and remove other users, zonegroups and zones.
+`radosgw-admin` command can be used to create and remove other users, zonegroups and zones.
-By default, the directory exported is *'/tmp/rgw_posix_driver'*. This can be changed with the `rgw_posix_base_path` option, either in ceph.conf or on the vstart command line above.
+By default, the directory exported, *'rgw_posix_driver'*, is created in the `dev` subdirectory. This can be changed with the `rgw_posix_base_path` option.
-The POSIXDriver keeps a LMDB based cache of directories, so that it can provide ordered listings. This directory lives in `rgw_posix_database_root`, which by default is in *'/var/lib/ceph/radosgw'*
+The POSIXDriver keeps a LMDB based cache of directories, so that it can provide ordered listings. This directory lives in `rgw_posix_database_root`, which by default is created in the `dev` subdirectory
diff --git a/src/rgw/driver/rados/rgw_notify.cc b/src/rgw/driver/rados/rgw_notify.cc
index 7b31fd72bd4..5734284d1a3 100644
--- a/src/rgw/driver/rados/rgw_notify.cc
+++ b/src/rgw/driver/rados/rgw_notify.cc
@@ -21,6 +21,7 @@
#include "common/dout.h"
#include "rgw_url.h"
#include <chrono>
+#include <fmt/format.h>
#define dout_subsys ceph_subsys_rgw_notification
@@ -769,9 +770,10 @@ public:
});
// start the worker threads to do the actual queue processing
- const std::string WORKER_THREAD_NAME = "notif-worker";
for (auto worker_id = 0U; worker_id < worker_count; ++worker_id) {
- workers.emplace_back([this]() {
+ workers.emplace_back([this,worker_id]() {
+ const auto thread_name = fmt::format("notif-worker-{}", worker_id);
+ ceph_pthread_setname(thread_name.c_str());
try {
io_context.run();
} catch (const std::exception& err) {
@@ -779,11 +781,6 @@ public:
throw err;
}
});
- const auto thread_name = WORKER_THREAD_NAME+std::to_string(worker_id);
- if (const auto rc = ceph_pthread_setname(workers.back().native_handle(), thread_name.c_str()); rc != 0) {
- ldpp_dout(this, 1) << "ERROR: failed to set notification manager thread name to: " << thread_name
- << ". error: " << rc << dendl;
- }
}
ldpp_dout(this, 10) << "INfO: started notification manager with: " << worker_count << " workers" << dendl;
}
diff --git a/src/rgw/rgw_amqp.cc b/src/rgw/rgw_amqp.cc
index 7504d47c6c9..5bc5d173c73 100644
--- a/src/rgw/rgw_amqp.cc
+++ b/src/rgw/rgw_amqp.cc
@@ -650,6 +650,9 @@ private:
// (4) TODO reconnect on connection errors
// (5) TODO cleanup timedout callbacks
void run() noexcept {
+ // give the runner thread a name for easier debugging
+ ceph_pthread_setname("amqp_manager");
+
amqp_frame_t frame;
while (!stopped) {
@@ -838,12 +841,6 @@ public:
// This is to prevent rehashing so that iterators are not invalidated
// when a new connection is added.
connections.max_load_factor(10.0);
- // give the runner thread a name for easier debugging
- const char* thread_name = "amqp_manager";
- if (const auto rc = ceph_pthread_setname(runner.native_handle(), thread_name); rc != 0) {
- ldout(cct, 1) << "ERROR: failed to set amqp manager thread name to: " << thread_name
- << ". error: " << rc << dendl;
- }
}
// non copyable
diff --git a/src/rgw/rgw_common.cc b/src/rgw/rgw_common.cc
index 68fb9a29278..1a59ba02999 100644
--- a/src/rgw/rgw_common.cc
+++ b/src/rgw/rgw_common.cc
@@ -2994,7 +2994,9 @@ void RGWAccessKey::decode_json(JSONObj *obj) {
subuser = user.substr(pos + 1);
}
}
- JSONDecoder::decode_json("active", active, obj);
+ if (bool tmp = false; JSONDecoder::decode_json("active", tmp, obj)) {
+ active = tmp; // update only if "active" is present
+ }
JSONDecoder::decode_json("create_date", create_date, obj);
}
diff --git a/src/rgw/rgw_kafka.cc b/src/rgw/rgw_kafka.cc
index c0ec3dc2c55..0807993338d 100644
--- a/src/rgw/rgw_kafka.cc
+++ b/src/rgw/rgw_kafka.cc
@@ -503,6 +503,7 @@ private:
}
void run() noexcept {
+ ceph_pthread_setname("kafka_manager");
while (!stopped) {
// publish all messages in the queue
@@ -575,12 +576,6 @@ public:
// This is to prevent rehashing so that iterators are not invalidated
// when a new connection is added.
connections.max_load_factor(10.0);
- // give the runner thread a name for easier debugging
- const char* thread_name = "kafka_manager";
- if (const auto rc = ceph_pthread_setname(runner.native_handle(), thread_name); rc != 0) {
- ldout(cct, 1) << "ERROR: failed to set kafka manager thread name to: " << thread_name
- << ". error: " << rc << dendl;
- }
}
// non copyable
diff --git a/src/rgw/rgw_lua_background.cc b/src/rgw/rgw_lua_background.cc
index ef97a5d6f65..c5b815f93f5 100644
--- a/src/rgw/rgw_lua_background.cc
+++ b/src/rgw/rgw_lua_background.cc
@@ -83,11 +83,6 @@ void Background::start() {
}
started = true;
runner = std::thread(&Background::run, this);
- const char* thread_name = "lua_background";
- if (const auto rc = ceph_pthread_setname(runner.native_handle(), thread_name); rc != 0) {
- ldout(cct, 1) << "ERROR: failed to set lua background thread name to: " << thread_name
- << ". error: " << rc << dendl;
- }
}
void Background::pause() {
@@ -127,6 +122,7 @@ const BackgroundMapValue& Background::get_table_value(const std::string& key) co
//(2) Executes the script
//(3) Sleep (configurable)
void Background::run() {
+ ceph_pthread_setname("lua_background");
const DoutPrefixProvider* const dpp = &dp;
lua_state_guard lguard(cct->_conf->rgw_lua_max_memory_per_state, dpp);
auto L = lguard.get();
diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc
index 67829e6320a..0dcf1e0f7d5 100644
--- a/src/rgw/rgw_op.cc
+++ b/src/rgw/rgw_op.cc
@@ -6811,6 +6811,8 @@ void RGWCompleteMultipart::execute(optional_yield y)
if (upload->cksum_type != rgw::cksum::Type::none) {
op_ret = try_sum_part_cksums(this, s->cct, upload.get(), parts, cksum, y);
if (op_ret < 0) {
+ ldpp_dout(this, 16) << "ERROR: try_sum_part_cksums failed, obj="
+ << meta_obj << " ret=" << op_ret << dendl;
return;
}
}
@@ -6835,13 +6837,23 @@ void RGWCompleteMultipart::execute(optional_yield y)
rgw::putobj::find_hdr_cksum(*(s->info.env));
ldpp_dout_fmt(this, 10,
- "INFO: client supplied checksum {}: {}",
+ "INFO: client supplied checksum {}: {} ",
hdr_cksum.header_name(), supplied_cksum);
if (! (supplied_cksum.empty()) &&
(supplied_cksum != armored_cksum)) {
- op_ret = -ERR_INVALID_REQUEST;
- return;
+ /* some minio SDK clients assert a checksum that is cryptographically
+ * valid but omits the part count */
+ auto parts_suffix = fmt::format("-{}", parts->parts.size());
+ auto suffix_len = armored_cksum->size() - parts_suffix.size();
+ if (armored_cksum->compare(0, suffix_len, supplied_cksum) != 0) {
+ ldpp_dout_fmt(this, 4,
+ "{} content checksum mismatch"
+ "\n\tcalculated={} != \n\texpected={}",
+ hdr_cksum.header_name(), armored_cksum, supplied_cksum);
+ op_ret = -ERR_INVALID_REQUEST;
+ return;
+ }
}
buffer::list cksum_bl;
diff --git a/src/rgw/rgw_ratelimit.h b/src/rgw/rgw_ratelimit.h
index 0db1813f050..beb0eb3b1d2 100644
--- a/src/rgw/rgw_ratelimit.h
+++ b/src/rgw/rgw_ratelimit.h
@@ -239,6 +239,7 @@ class ActiveRateLimiter : public DoutPrefix {
std::atomic_uint8_t current_active = 0;
std::shared_ptr<RateLimiter> ratelimit[2];
void replace_active() {
+ ceph_pthread_setname("ratelimit_gc");
using namespace std::chrono_literals;
std::unique_lock<std::mutex> lk(cv_m);
while (!stopped) {
@@ -286,8 +287,5 @@ class ActiveRateLimiter : public DoutPrefix {
void start() {
ldpp_dout(this, 20) << "starting ratelimit_gc thread" << dendl;
runner = std::thread(&ActiveRateLimiter::replace_active, this);
- if (const auto rc = ceph_pthread_setname(runner.native_handle(), "ratelimit_gc"); rc != 0) {
- ldpp_dout(this, 1) << "ERROR: failed to set ratelimit_gc thread name. error: " << rc << dendl;
- }
}
};
diff --git a/src/rgw/rgw_rest_pubsub.cc b/src/rgw/rgw_rest_pubsub.cc
index c0345a4f88a..adfc86d87cb 100644
--- a/src/rgw/rgw_rest_pubsub.cc
+++ b/src/rgw/rgw_rest_pubsub.cc
@@ -494,11 +494,11 @@ void RGWPSListTopicsOp::execute(optional_yield y) {
const RGWPubSub ps(driver, get_account_or_tenant(s->owner.id), *s->penv.site);
if (rgw::all_zonegroups_support(*s->penv.site, rgw::zone_features::notification_v2) &&
- driver->stat_topics_v1(s->bucket->get_tenant(), null_yield, this) == -ENOENT) {
- op_ret = ps.get_topics_v1(this, result, y);
- } else {
+ driver->stat_topics_v1(get_account_or_tenant(s->owner.id), null_yield, this) == -ENOENT) {
constexpr int max_items = 100;
op_ret = ps.get_topics_v2(this, start_token, max_items, result, next_token, y);
+ } else {
+ op_ret = ps.get_topics_v1(this, result, y);
}
// if there are no topics it is not considered an error
op_ret = op_ret == -ENOENT ? 0 : op_ret;
diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc
index a245fca9945..68223405cf4 100644
--- a/src/rgw/rgw_rest_s3.cc
+++ b/src/rgw/rgw_rest_s3.cc
@@ -2399,11 +2399,17 @@ static void dump_bucket_metadata(req_state *s, rgw::sal::Bucket* bucket,
if (s->auth.identity->is_owner_of(bucket->get_owner())) {
const auto& user_info = s->user->get_info();
const auto& bucket_quota = s->bucket->get_info().quota; // bucket quota
- dump_header(s, "X-RGW-Quota-User-Size", static_cast<long long>(user_info.quota.user_quota.max_size));
- dump_header(s, "X-RGW-Quota-User-Objects", static_cast<long long>(user_info.quota.user_quota.max_objects));
dump_header(s, "X-RGW-Quota-Max-Buckets", static_cast<long long>(user_info.max_buckets));
- dump_header(s, "X-RGW-Quota-Bucket-Size", static_cast<long long>(bucket_quota.max_size));
- dump_header(s, "X-RGW-Quota-Bucket-Objects", static_cast<long long>(bucket_quota.max_objects));
+
+ if (user_info.quota.user_quota.enabled){
+ dump_header(s, "X-RGW-Quota-User-Size", static_cast<long long>(user_info.quota.user_quota.max_size));
+ dump_header(s, "X-RGW-Quota-User-Objects", static_cast<long long>(user_info.quota.user_quota.max_objects));
+ }
+
+ if (bucket_quota.enabled){
+ dump_header(s, "X-RGW-Quota-Bucket-Size", static_cast<long long>(bucket_quota.max_size));
+ dump_header(s, "X-RGW-Quota-Bucket-Objects", static_cast<long long>(bucket_quota.max_objects));
+ }
}
}
diff --git a/src/test/admin_socket.cc b/src/test/admin_socket.cc
index 69a3cbefd0e..dea29f96f11 100644
--- a/src/test/admin_socket.cc
+++ b/src/test/admin_socket.cc
@@ -27,6 +27,8 @@
#include <sys/un.h>
#include <signal.h>
+#include <iostream> // for std::cout
+
using namespace std;
class AdminSocketTest
diff --git a/src/test/admin_socket_output.h b/src/test/admin_socket_output.h
index 1df12e4a9a5..5d22e8757ee 100644
--- a/src/test/admin_socket_output.h
+++ b/src/test/admin_socket_output.h
@@ -16,6 +16,7 @@
#define CEPH_ADMIN_SOCKET_OUTPUT_H
#include <filesystem>
+#include <iostream> // for std::cout
#include <string>
#include <map>
#include <set>
diff --git a/src/test/bench_log.cc b/src/test/bench_log.cc
index 60fda462e87..9e7c02afc41 100644
--- a/src/test/bench_log.cc
+++ b/src/test/bench_log.cc
@@ -1,6 +1,8 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
+#include <iostream> // for std::cout
+
#include "include/types.h"
#include "common/Thread.h"
#include "common/debug.h"
diff --git a/src/test/bufferlist.cc b/src/test/bufferlist.cc
index 013335d8177..4b3ca95ca6c 100644
--- a/src/test/bufferlist.cc
+++ b/src/test/bufferlist.cc
@@ -23,6 +23,8 @@
#include <errno.h>
#include <sys/uio.h>
+#include <iostream> // for std::cout
+
#include "include/buffer.h"
#include "include/buffer_raw.h"
#include "include/compat.h"
diff --git a/src/test/ceph_argparse.cc b/src/test/ceph_argparse.cc
index 738879c5ba8..436ddc86363 100644
--- a/src/test/ceph_argparse.cc
+++ b/src/test/ceph_argparse.cc
@@ -15,6 +15,7 @@
#include "common/ceph_argparse.h"
#include "gtest/gtest.h"
+#include <iostream> // for std::cout
#include <vector>
#include "include/stringify.h"
diff --git a/src/test/common/Throttle.cc b/src/test/common/Throttle.cc
index b36d0a901de..6ca05f6dae3 100644
--- a/src/test/common/Throttle.cc
+++ b/src/test/common/Throttle.cc
@@ -23,6 +23,7 @@
#include <signal.h>
#include <chrono>
+#include <iostream> // for std::cout
#include <list>
#include <mutex>
#include <random>
diff --git a/src/test/common/test_cdc.cc b/src/test/common/test_cdc.cc
index 620ecf4679f..61a5aa3708c 100644
--- a/src/test/common/test_cdc.cc
+++ b/src/test/common/test_cdc.cc
@@ -3,6 +3,7 @@
#include <vector>
#include <cstring>
+#include <iostream> // for std::cout
#include <random>
#include "include/types.h"
diff --git a/src/test/common/test_config.cc b/src/test/common/test_config.cc
index a70d567a434..4805c14a32e 100644
--- a/src/test/common/test_config.cc
+++ b/src/test/common/test_config.cc
@@ -19,6 +19,9 @@
*
*
*/
+
+#include <iostream> // for std::cout
+
#include "common/config_proxy.h"
#include "common/errno.h"
#include "gtest/gtest.h"
diff --git a/src/test/common/test_context.cc b/src/test/common/test_context.cc
index 889d000da85..8afded98951 100644
--- a/src/test/common/test_context.cc
+++ b/src/test/common/test_context.cc
@@ -19,6 +19,9 @@
*
*
*/
+
+#include <iostream> // for std::cout
+
#include "gtest/gtest.h"
#include "include/types.h"
#include "include/msgr.h"
diff --git a/src/test/common/test_shared_cache.cc b/src/test/common/test_shared_cache.cc
index 91120c7e59f..b7a392426d5 100644
--- a/src/test/common/test_shared_cache.cc
+++ b/src/test/common/test_shared_cache.cc
@@ -22,6 +22,9 @@
#include <stdio.h>
#include <signal.h>
+
+#include <iostream> // for std::cout
+
#include "gtest/gtest.h"
#include "common/Thread.h"
#include "common/shared_cache.hpp"
diff --git a/src/test/common/test_url_escape.cc b/src/test/common/test_url_escape.cc
index 6c27b64da7a..52de8db8d9c 100644
--- a/src/test/common/test_url_escape.cc
+++ b/src/test/common/test_url_escape.cc
@@ -3,6 +3,8 @@
#include "common/url_escape.h"
+#include <iostream> // for std::cout
+
#include "gtest/gtest.h"
TEST(url_escape, escape) {
diff --git a/src/test/compressor/test_compression.cc b/src/test/compressor/test_compression.cc
index 98ef159dfb8..c5e4724cefc 100644
--- a/src/test/compressor/test_compression.cc
+++ b/src/test/compressor/test_compression.cc
@@ -17,6 +17,9 @@
#include <errno.h>
#include <signal.h>
#include <stdlib.h>
+
+#include <iostream> // for std::cout
+
#include "gtest/gtest.h"
#include "common/ceph_context.h"
#include "common/config.h"
diff --git a/src/test/crimson/seastore/test_object_data_handler.cc b/src/test/crimson/seastore/test_object_data_handler.cc
index e7aabf2c8af..5dbc3748e5b 100644
--- a/src/test/crimson/seastore/test_object_data_handler.cc
+++ b/src/test/crimson/seastore/test_object_data_handler.cc
@@ -218,14 +218,20 @@ struct object_data_handler_test_t:
objaddr_t offset,
extent_len_t length) {
auto ret = with_trans_intr(t, [&](auto &t) {
- return tm->get_pins(t, laddr_t::from_byte_offset(offset), length);
+ auto &layout = onode->get_layout();
+ auto odata = layout.object_data.get();
+ auto obase = odata.get_reserved_data_base();
+ return tm->get_pins(t, (obase + offset).checked_to_laddr(), length);
}).unsafe_get();
return ret;
}
std::list<LBAMappingRef> get_mappings(objaddr_t offset, extent_len_t length) {
auto t = create_mutate_transaction();
auto ret = with_trans_intr(*t, [&](auto &t) {
- return tm->get_pins(t, laddr_t::from_byte_offset(offset), length);
+ auto &layout = onode->get_layout();
+ auto odata = layout.object_data.get();
+ auto obase = odata.get_reserved_data_base();
+ return tm->get_pins(t, (obase + offset).checked_to_laddr(), length);
}).unsafe_get();
return ret;
}
@@ -253,12 +259,16 @@ struct object_data_handler_test_t:
ObjectDataBlockRef get_extent(
Transaction &t,
- laddr_t addr,
+ loffset_t addr,
extent_len_t len) {
+ auto &layout = onode->get_layout();
+ auto odata = layout.object_data.get();
+ auto obase = odata.get_reserved_data_base();
auto ext = with_trans_intr(t, [&](auto& trans) {
- return tm->read_extent<ObjectDataBlock>(trans, addr, len);
- }).unsafe_get();
- EXPECT_EQ(addr, ext->get_laddr());
+ return tm->read_extent<ObjectDataBlock>(
+ trans, (obase + addr).checked_to_laddr(), len);
+ }).unsafe_get();
+ EXPECT_EQ((obase + addr).checked_to_laddr(), ext->get_laddr());
return ext;
}
@@ -798,7 +808,7 @@ TEST_P(object_data_handler_test_t, overwrite_then_read_within_transaction) {
auto pins = get_mappings(*t, base, len);
assert(pins.size() == 1);
auto pin1 = remap_pin(*t, std::move(pins.front()), 4096, 8192);
- auto ext = get_extent(*t, laddr_t::from_byte_offset(base + 4096), 4096 * 2);
+ auto ext = get_extent(*t, base + 4096, 4096 * 2);
ASSERT_TRUE(ext->is_exist_clean());
write(*t, base + 4096, 4096, 'y');
ASSERT_TRUE(ext->is_exist_mutation_pending());
diff --git a/src/test/crimson/seastore/test_transaction_manager.cc b/src/test/crimson/seastore/test_transaction_manager.cc
index 6ad111dca5b..2d20c5fff94 100644
--- a/src/test/crimson/seastore/test_transaction_manager.cc
+++ b/src/test/crimson/seastore/test_transaction_manager.cc
@@ -26,6 +26,10 @@ namespace {
}
}
+laddr_t get_laddr_hint(uint64_t offset) {
+ return laddr_t::from_byte_offset(RootMetaBlock::SIZE + offset);
+}
+
struct test_extent_record_t {
test_extent_desc_t desc;
unsigned refcount = 0;
@@ -67,8 +71,9 @@ struct transaction_manager_test_t :
}
laddr_t get_random_laddr(size_t block_size, size_t limit) {
- return laddr_t::from_byte_offset(block_size *
- std::uniform_int_distribution<>(0, (limit / block_size) - 1)(gen));
+ auto offset = block_size *
+ std::uniform_int_distribution<>(0, (limit / block_size) - 1)(gen);
+ return get_laddr_hint(offset);
}
char get_random_contents() {
@@ -719,7 +724,7 @@ struct transaction_manager_test_t :
[this, &overlay](auto &t) {
return lba_manager->scan_mappings(
t,
- L_ADDR_MIN,
+ get_laddr_hint(0),
L_ADDR_MAX,
[iter=overlay.begin(), &overlay](auto l, auto p, auto len) mutable {
EXPECT_NE(iter, overlay.end());
@@ -830,9 +835,9 @@ struct transaction_manager_test_t :
auto t = create_transaction();
auto extent = alloc_extent(
t,
- laddr_t::from_byte_offset(i * BSIZE),
+ get_laddr_hint(i * BSIZE),
BSIZE);
- ASSERT_EQ(laddr_t::from_byte_offset(i * BSIZE), extent->get_laddr());
+ ASSERT_EQ(get_laddr_hint(i * BSIZE), extent->get_laddr());
submit_transaction(std::move(t));
}
@@ -844,7 +849,7 @@ struct transaction_manager_test_t :
boost::make_counting_iterator(0lu),
boost::make_counting_iterator(BLOCKS),
[this, &t](auto i) {
- return tm->read_extent<TestBlock>(t, laddr_t::from_byte_offset(i * BSIZE), BSIZE
+ return tm->read_extent<TestBlock>(t, get_laddr_hint(i * BSIZE), BSIZE
).si_then([](auto) {
return seastar::now();
});
@@ -870,9 +875,9 @@ struct transaction_manager_test_t :
auto t = create_transaction();
auto extent = alloc_extent(
t,
- laddr_t::from_byte_offset(i * BSIZE),
+ get_laddr_hint(i * BSIZE),
BSIZE);
- ASSERT_EQ(laddr_t::from_byte_offset(i * BSIZE), extent->get_laddr());
+ ASSERT_EQ(get_laddr_hint(i * BSIZE), extent->get_laddr());
if (try_submit_transaction(std::move(t)))
break;
}
@@ -973,6 +978,7 @@ struct transaction_manager_test_t :
extent_types_t::ROOT,
extent_types_t::LADDR_INTERNAL,
extent_types_t::LADDR_LEAF,
+ extent_types_t::ROOT_META,
extent_types_t::OMAP_INNER,
extent_types_t::OMAP_LEAF,
extent_types_t::ONODE_BLOCK_STAGED,
@@ -1346,9 +1352,9 @@ struct transaction_manager_test_t :
void test_remap_pin() {
run_async([this] {
disable_max_extent_size();
- laddr_t l_offset = laddr_t::from_byte_offset(32 << 10);
+ laddr_t l_offset = get_laddr_hint(32 << 10);
size_t l_len = 32 << 10;
- laddr_t r_offset = laddr_t::from_byte_offset(64 << 10);
+ laddr_t r_offset = get_laddr_hint(64 << 10);
size_t r_len = 32 << 10;
{
auto t = create_transaction();
@@ -1400,12 +1406,12 @@ struct transaction_manager_test_t :
void test_clone_and_remap_pin() {
run_async([this] {
disable_max_extent_size();
- laddr_t l_offset = laddr_t::from_byte_offset(32 << 10);
+ laddr_t l_offset = get_laddr_hint(32 << 10);
size_t l_len = 32 << 10;
- laddr_t r_offset = laddr_t::from_byte_offset(64 << 10);
+ laddr_t r_offset = get_laddr_hint(64 << 10);
size_t r_len = 32 << 10;
- laddr_t l_clone_offset = laddr_t::from_byte_offset(96 << 10);
- laddr_t r_clone_offset = laddr_t::from_byte_offset(128 << 10);
+ laddr_t l_clone_offset = get_laddr_hint(96 << 10);
+ laddr_t r_clone_offset = get_laddr_hint(128 << 10);
{
auto t = create_transaction();
auto lext = alloc_extent(t, l_offset, l_len);
@@ -1455,11 +1461,11 @@ struct transaction_manager_test_t :
void test_overwrite_pin() {
run_async([this] {
disable_max_extent_size();
- laddr_t m_offset = laddr_t::from_byte_offset(8 << 10);
+ laddr_t m_offset = get_laddr_hint(8 << 10);
size_t m_len = 56 << 10;
- laddr_t l_offset = laddr_t::from_byte_offset(64 << 10);
+ laddr_t l_offset = get_laddr_hint(64 << 10);
size_t l_len = 64 << 10;
- laddr_t r_offset = laddr_t::from_byte_offset(128 << 10);
+ laddr_t r_offset = get_laddr_hint(128 << 10);
size_t r_len = 64 << 10;
{
auto t = create_transaction();
@@ -1538,7 +1544,7 @@ struct transaction_manager_test_t :
run_async([this] {
disable_max_extent_size();
constexpr unsigned REMAP_NUM = 32;
- constexpr laddr_t offset = L_ADDR_MIN;
+ laddr_t offset = get_laddr_hint(0);
constexpr size_t length = 256 << 10;
{
auto t = create_transaction();
@@ -1575,7 +1581,7 @@ struct transaction_manager_test_t :
if (off == 0 || off >= 255) {
continue;
}
- auto new_off = laddr_t::from_byte_offset(off << 10)
+ auto new_off = get_laddr_hint(off << 10)
.get_byte_distance<extent_len_t>(last_pin->get_key());
auto new_len = last_pin->get_length() - new_off;
//always remap right extent at new split_point
@@ -1621,7 +1627,7 @@ struct transaction_manager_test_t :
run_async([this] {
disable_max_extent_size();
constexpr unsigned REMAP_NUM = 32;
- constexpr laddr_t offset = L_ADDR_MIN;
+ laddr_t offset = get_laddr_hint(0);
constexpr size_t length = 256 << 10;
{
auto t = create_transaction();
@@ -1661,12 +1667,12 @@ struct transaction_manager_test_t :
ASSERT_TRUE(!split_points.empty());
while(!split_points.empty()) {
// new overwrite area: start_off ~ end_off
- auto start_off = split_points.front();
+ auto start_off = split_points.front() + 4 /*RootMetaBlock*/;
split_points.pop_front();
- auto end_off = split_points.front();
+ auto end_off = split_points.front() + 4 /*RootMetaBlock*/;
split_points.pop_front();
ASSERT_TRUE(start_off <= end_off);
- if ((laddr_t::from_byte_offset(end_off << 10) == pin0->get_key() + pin0->get_length())
+ if ((get_laddr_hint(end_off << 10) == pin0->get_key() + pin0->get_length())
|| (start_off == end_off)) {
if (split_points.empty() && empty_transaction) {
early_exit++;
@@ -1675,7 +1681,7 @@ struct transaction_manager_test_t :
continue;
}
empty_transaction = false;
- auto new_off = laddr_t::from_byte_offset(start_off << 10)
+ auto new_off = get_laddr_hint(start_off << 10)
.get_byte_distance<extent_len_t>(last_rpin->get_key());
auto new_len = (end_off - start_off) << 10;
bufferlist bl;
@@ -1768,7 +1774,7 @@ struct tm_random_block_device_test_t :
TEST_P(tm_random_block_device_test_t, scatter_allocation)
{
run_async([this] {
- laddr_t ADDR = laddr_t::from_byte_offset(0xFF * 4096);
+ laddr_t ADDR = get_laddr_hint(0xFF * 4096);
epm->prefill_fragmented_devices();
auto t = create_transaction();
for (int i = 0; i < 1991; i++) {
@@ -1786,7 +1792,7 @@ TEST_P(tm_single_device_test_t, basic)
{
constexpr size_t SIZE = 4096;
run_async([this] {
- laddr_t ADDR = laddr_t::from_byte_offset(0xFF * SIZE);
+ laddr_t ADDR = get_laddr_hint(0xFF * SIZE);
{
auto t = create_transaction();
auto extent = alloc_extent(
@@ -1807,7 +1813,7 @@ TEST_P(tm_single_device_test_t, mutate)
{
constexpr size_t SIZE = 4096;
run_async([this] {
- laddr_t ADDR = laddr_t::from_byte_offset(0xFF * SIZE);
+ laddr_t ADDR = get_laddr_hint(0xFF * SIZE);
{
auto t = create_transaction();
auto extent = alloc_extent(
@@ -1845,8 +1851,8 @@ TEST_P(tm_single_device_test_t, allocate_lba_conflict)
{
constexpr size_t SIZE = 4096;
run_async([this] {
- laddr_t ADDR = laddr_t::from_byte_offset(0xFF * SIZE);
- laddr_t ADDR2 = laddr_t::from_byte_offset(0xFE * SIZE);
+ laddr_t ADDR = get_laddr_hint(0xFF * SIZE);
+ laddr_t ADDR2 = get_laddr_hint(0xFE * SIZE);
auto t = create_transaction();
auto t2 = create_transaction();
@@ -1883,7 +1889,7 @@ TEST_P(tm_single_device_test_t, mutate_lba_conflict)
for (unsigned i = 0; i < 300; ++i) {
auto extent = alloc_extent(
t,
- laddr_t::from_byte_offset(i * SIZE),
+ get_laddr_hint(i * SIZE),
SIZE);
}
check_mappings(t);
@@ -1891,7 +1897,7 @@ TEST_P(tm_single_device_test_t, mutate_lba_conflict)
check();
}
- laddr_t ADDR = laddr_t::from_byte_offset(150 * SIZE);
+ laddr_t ADDR = get_laddr_hint(150 * SIZE);
{
auto t = create_transaction();
auto t2 = create_transaction();
@@ -1917,15 +1923,15 @@ TEST_P(tm_single_device_test_t, concurrent_mutate_lba_no_conflict)
{
constexpr size_t SIZE = 4096;
constexpr size_t NUM = 500;
- laddr_t addr = L_ADDR_MIN;
- laddr_t addr2 = laddr_t::from_byte_offset(SIZE * (NUM - 1));
+ laddr_t addr = get_laddr_hint(0);
+ laddr_t addr2 = get_laddr_hint(SIZE * (NUM - 1));
run_async([this, addr, addr2] {
{
auto t = create_transaction();
for (unsigned i = 0; i < NUM; ++i) {
auto extent = alloc_extent(
t,
- laddr_t::from_byte_offset(i * SIZE),
+ get_laddr_hint(i * SIZE),
SIZE);
}
submit_transaction(std::move(t));
@@ -1949,7 +1955,7 @@ TEST_P(tm_single_device_test_t, create_remove_same_transaction)
{
constexpr size_t SIZE = 4096;
run_async([this] {
- laddr_t ADDR = laddr_t::from_byte_offset(0xFF * SIZE);
+ laddr_t ADDR = get_laddr_hint(0xFF * SIZE);
{
auto t = create_transaction();
auto extent = alloc_extent(
@@ -1985,7 +1991,7 @@ TEST_P(tm_single_device_test_t, split_merge_read_same_transaction)
for (unsigned i = 0; i < 300; ++i) {
auto extent = alloc_extent(
t,
- laddr_t::from_byte_offset(i * SIZE),
+ get_laddr_hint(i * SIZE),
SIZE);
}
check_mappings(t);
@@ -1997,7 +2003,7 @@ TEST_P(tm_single_device_test_t, split_merge_read_same_transaction)
for (unsigned i = 0; i < 240; ++i) {
dec_ref(
t,
- laddr_t::from_byte_offset(i * SIZE));
+ get_laddr_hint(i * SIZE));
}
check_mappings(t);
submit_transaction(std::move(t));
@@ -2010,7 +2016,7 @@ TEST_P(tm_single_device_test_t, inc_dec_ref)
{
constexpr size_t SIZE = 4096;
run_async([this] {
- laddr_t ADDR = laddr_t::from_byte_offset(0xFF * SIZE);
+ laddr_t ADDR = get_laddr_hint(0xFF * SIZE);
{
auto t = create_transaction();
auto extent = alloc_extent(
@@ -2061,10 +2067,10 @@ TEST_P(tm_single_device_test_t, cause_lba_split)
auto t = create_transaction();
auto extent = alloc_extent(
t,
- laddr_t::from_byte_offset(i * SIZE),
+ get_laddr_hint(i * SIZE),
SIZE,
(char)(i & 0xFF));
- ASSERT_EQ(laddr_t::from_byte_offset(i * SIZE), extent->get_laddr());
+ ASSERT_EQ(get_laddr_hint(i * SIZE), extent->get_laddr());
submit_transaction(std::move(t));
}
check();
@@ -2082,9 +2088,9 @@ TEST_P(tm_single_device_test_t, random_writes)
auto t = create_transaction();
auto extent = alloc_extent(
t,
- laddr_t::from_byte_offset(i * BSIZE),
+ get_laddr_hint(i * BSIZE),
BSIZE);
- ASSERT_EQ(laddr_t::from_byte_offset(i * BSIZE), extent->get_laddr());
+ ASSERT_EQ(get_laddr_hint(i * BSIZE), extent->get_laddr());
submit_transaction(std::move(t));
}
@@ -2100,7 +2106,7 @@ TEST_P(tm_single_device_test_t, random_writes)
// pad out transaction
auto paddings = alloc_extents(
t,
- laddr_t::from_byte_offset(TOTAL + (k * PADDING_SIZE)),
+ get_laddr_hint(TOTAL + (k * PADDING_SIZE)),
PADDING_SIZE);
for (auto &padding : paddings) {
dec_ref(t, padding->get_laddr());
@@ -2133,7 +2139,7 @@ TEST_P(tm_single_device_test_t, find_hole_assert_trigger)
TEST_P(tm_single_device_intergrity_check_test_t, remap_lazy_read)
{
- constexpr laddr_t offset = L_ADDR_MIN;
+ laddr_t offset = get_laddr_hint(0);
constexpr size_t length = 256 << 10;
run_async([this, offset] {
disable_max_extent_size();
@@ -2186,7 +2192,7 @@ TEST_P(tm_single_device_test_t, invalid_lba_mapping_detect)
for (int i = 0; i < LEAF_NODE_CAPACITY; i++) {
auto extent = alloc_extent(
t,
- laddr_t::from_byte_offset(i * 4096),
+ get_laddr_hint(i * 4096),
4096,
'a');
}
@@ -2195,12 +2201,12 @@ TEST_P(tm_single_device_test_t, invalid_lba_mapping_detect)
{
auto t = create_transaction();
- auto pin = get_pin(t, laddr_t::from_byte_offset((LEAF_NODE_CAPACITY - 1) * 4096));
+ auto pin = get_pin(t, get_laddr_hint((LEAF_NODE_CAPACITY - 1) * 4096));
assert(pin->is_parent_viewable());
- auto extent = alloc_extent(t, laddr_t::from_byte_offset(LEAF_NODE_CAPACITY * 4096), 4096, 'a');
+ auto extent = alloc_extent(t, get_laddr_hint(LEAF_NODE_CAPACITY * 4096), 4096, 'a');
assert(!pin->is_parent_viewable());
- pin = get_pin(t, laddr_t::from_byte_offset(LEAF_NODE_CAPACITY * 4096));
- std::ignore = alloc_extent(t, laddr_t::from_byte_offset((LEAF_NODE_CAPACITY + 1) * 4096), 4096, 'a');
+ pin = get_pin(t, get_laddr_hint(LEAF_NODE_CAPACITY * 4096));
+ std::ignore = alloc_extent(t, get_laddr_hint((LEAF_NODE_CAPACITY + 1) * 4096), 4096, 'a');
assert(pin->is_parent_viewable());
assert(pin->parent_modified());
pin->maybe_fix_pos();
diff --git a/src/test/crypto.cc b/src/test/crypto.cc
index 819d41c7218..67fb440eeb9 100644
--- a/src/test/crypto.cc
+++ b/src/test/crypto.cc
@@ -1,6 +1,8 @@
#include <errno.h>
#include <time.h>
+#include <iostream> // for std::cout
+
#include <boost/container/small_vector.hpp>
#include "gtest/gtest.h"
diff --git a/src/test/daemon_config.cc b/src/test/daemon_config.cc
index cdea3b05932..4c7abd70b20 100644
--- a/src/test/daemon_config.cc
+++ b/src/test/daemon_config.cc
@@ -21,6 +21,8 @@
#include "include/rados/librados.h"
#include <errno.h>
+
+#include <iostream> // for std::cout
#include <sstream>
#include <string>
#include <string.h>
diff --git a/src/test/encoding.cc b/src/test/encoding.cc
index 3c83716b048..3d508909d6d 100644
--- a/src/test/encoding.cc
+++ b/src/test/encoding.cc
@@ -4,6 +4,8 @@
#include <fmt/format.h>
#include "gtest/gtest.h"
+#include <iostream> // for std::cout
+
using namespace std;
template < typename T >
diff --git a/src/test/osd/types.cc b/src/test/osd/types.cc
index 2dc870411bb..062980d8655 100644
--- a/src/test/osd/types.cc
+++ b/src/test/osd/types.cc
@@ -23,6 +23,8 @@
#include "common/Thread.h"
#include "include/stringify.h"
#include "osd/ReplicatedBackend.h"
+
+#include <iostream> // for std::cout
#include <sstream>
using namespace std;
diff --git a/src/test/osdc/object_cacher_stress.cc b/src/test/osdc/object_cacher_stress.cc
index 0bfdd48eb98..b32c5660112 100644
--- a/src/test/osdc/object_cacher_stress.cc
+++ b/src/test/osdc/object_cacher_stress.cc
@@ -23,6 +23,7 @@
#include "MemWriteback.h"
#include <atomic>
+#include <iostream> // for std::cout
using namespace std;
diff --git a/src/test/perf_counters.cc b/src/test/perf_counters.cc
index 7ab9561bc19..b75e6a50825 100644
--- a/src/test/perf_counters.cc
+++ b/src/test/perf_counters.cc
@@ -15,7 +15,7 @@
#include "include/types.h" // FIXME: ordering shouldn't be important, but right
// now, this include has to come before the others.
-
+#include "include/utime.h"
#include "common/perf_counters_key.h"
#include "common/perf_counters_collection.h"
#include "common/admin_socket_client.h"
diff --git a/src/test/test_addrs.cc b/src/test/test_addrs.cc
index 4062d0431c3..e70d234d743 100644
--- a/src/test/test_addrs.cc
+++ b/src/test/test_addrs.cc
@@ -17,6 +17,7 @@
#include "msg/msg_types.h"
#include "gtest/gtest.h"
+#include <iostream> // for std::cout
#include <sstream>
using namespace std;
diff --git a/src/test/test_denc.cc b/src/test/test_denc.cc
index 02dd1454ef8..c9144bd05bf 100644
--- a/src/test/test_denc.cc
+++ b/src/test/test_denc.cc
@@ -15,6 +15,8 @@
*/
#include <stdio.h>
+
+#include <iostream> // for std::cout
#include <numeric>
#include "global/global_init.h"
diff --git a/src/test/test_features.cc b/src/test/test_features.cc
index 1ae758bfb34..bdd8838224b 100644
--- a/src/test/test_features.cc
+++ b/src/test/test_features.cc
@@ -2,6 +2,8 @@
// vim: ts=8 sw=2 smarttab
#include <stdio.h>
+#include <iostream> // for std::cout
+
#include "global/global_init.h"
#include "common/ceph_argparse.h"
#include "common/ceph_releases.h"
diff --git a/src/test/test_mempool.cc b/src/test/test_mempool.cc
index bb46b19aa4e..b806282d039 100644
--- a/src/test/test_mempool.cc
+++ b/src/test/test_mempool.cc
@@ -16,6 +16,8 @@
#include <stdio.h>
+#include <iostream> // for std::cout
+
#include "global/global_init.h"
#include "common/ceph_argparse.h"
#include "global/global_context.h"
diff --git a/src/test/test_perf_counters_cache.cc b/src/test/test_perf_counters_cache.cc
index 1fa147ee273..fa2d541b7f7 100644
--- a/src/test/test_perf_counters_cache.cc
+++ b/src/test/test_perf_counters_cache.cc
@@ -4,6 +4,7 @@
#include "global/global_context.h"
#include "global/global_init.h"
#include "include/msgr.h" // for CEPH_ENTITY_TYPE_CLIENT
+#include "include/utime.h"
#include "gtest/gtest.h"
using namespace ceph::perf_counters;
diff --git a/src/test/test_rewrite_latency.cc b/src/test/test_rewrite_latency.cc
index 348c8dde5c6..48a95cf183b 100644
--- a/src/test/test_rewrite_latency.cc
+++ b/src/test/test_rewrite_latency.cc
@@ -1,5 +1,6 @@
#include <unistd.h>
+#include <iostream> // for std::cout
#include <map>
#include <errno.h>
diff --git a/src/test/test_snap_mapper.cc b/src/test/test_snap_mapper.cc
index a47d2538c3a..7a9ac62defe 100644
--- a/src/test/test_snap_mapper.cc
+++ b/src/test/test_snap_mapper.cc
@@ -1,4 +1,5 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+#include <iostream> // for std::cout
#include <iterator>
#include <map>
#include <set>
diff --git a/src/test/test_striper.cc b/src/test/test_striper.cc
index ee70304ebc8..1e5f93a49be 100644
--- a/src/test/test_striper.cc
+++ b/src/test/test_striper.cc
@@ -3,6 +3,8 @@
#include "osdc/Striper.h"
+#include <iostream> // for std::cout
+
using namespace std;
TEST(Striper, Stripe1)
diff --git a/src/test/test_utime.cc b/src/test/test_utime.cc
index b1cee0e805c..de1d6f46878 100644
--- a/src/test/test_utime.cc
+++ b/src/test/test_utime.cc
@@ -1,4 +1,7 @@
#include "include/utime.h"
+
+#include <iostream> // for std::cout
+
#include "gtest/gtest.h"
#include "include/stringify.h"
#include "common/ceph_context.h"
diff --git a/src/test/test_workqueue.cc b/src/test/test_workqueue.cc
index 771b9d65952..5c2fc459da2 100644
--- a/src/test/test_workqueue.cc
+++ b/src/test/test_workqueue.cc
@@ -1,6 +1,9 @@
#include "gtest/gtest.h"
#include "common/WorkQueue.h"
+
+#include <iostream> // for std::cout
+
#include "common/ceph_argparse.h"
using namespace std;
diff --git a/src/test/testcrypto.cc b/src/test/testcrypto.cc
index 2efb9b219b9..8e3337babea 100644
--- a/src/test/testcrypto.cc
+++ b/src/test/testcrypto.cc
@@ -1,6 +1,8 @@
#include "auth/Crypto.h"
-#include "common/Clock.h"
+#include <iostream> // for std::cout
+
+#include "common/Clock.h"
#include "common/config.h"
#include "common/debug.h"
diff --git a/src/test/testkeys.cc b/src/test/testkeys.cc
index 85d0b56676f..dacddb08786 100644
--- a/src/test/testkeys.cc
+++ b/src/test/testkeys.cc
@@ -1,4 +1,7 @@
#include "auth/cephx/CephxKeyServer.h"
+
+#include <iostream> // for std::cout
+
#include "common/ceph_argparse.h"
#include "global/global_init.h"
#include "common/config.h"
diff --git a/src/tools/radosacl.cc b/src/tools/radosacl.cc
index 3bfef8fb157..a6c9b9f8dc4 100644
--- a/src/tools/radosacl.cc
+++ b/src/tools/radosacl.cc
@@ -16,6 +16,8 @@
#include <time.h>
#include <errno.h>
+#include <iostream> // for std::cerr
+
#include "include/types.h"
#include "include/rados/librados.hpp"