summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/zap.py3
-rw-r--r--src/ceph-volume/ceph_volume/tests/devices/lvm/data_zap.py2
-rw-r--r--src/ceph-volume/ceph_volume/util/disk.py19
-rw-r--r--src/ceph-volume/ceph_volume/util/prepare.py3
-rwxr-xr-xsrc/cephadm/cephadm.py42
-rw-r--r--src/cephadm/cephadmlib/daemons/monitoring.py28
-rw-r--r--src/cephadm/cephadmlib/file_utils.py24
-rw-r--r--src/cephadm/cephadmlib/net_utils.py14
-rw-r--r--src/cephadm/tests/test_deploy.py2
-rw-r--r--src/common/CMakeLists.txt1
-rw-r--r--src/common/bit_vector.hpp4
-rw-r--r--src/common/ceph_time.h17
-rw-r--r--src/common/io_exerciser/CMakeLists.txt4
-rw-r--r--src/common/io_exerciser/DataGenerator.cc794
-rw-r--r--src/common/io_exerciser/DataGenerator.h268
-rw-r--r--src/common/io_exerciser/EcIoSequence.cc267
-rw-r--r--src/common/io_exerciser/EcIoSequence.h65
-rw-r--r--src/common/io_exerciser/IoOp.cc424
-rw-r--r--src/common/io_exerciser/IoOp.h312
-rw-r--r--src/common/io_exerciser/IoSequence.cc327
-rw-r--r--src/common/io_exerciser/IoSequence.h399
-rw-r--r--src/common/io_exerciser/Model.cc24
-rw-r--r--src/common/io_exerciser/Model.h62
-rw-r--r--src/common/io_exerciser/ObjectModel.cc242
-rw-r--r--src/common/io_exerciser/ObjectModel.h75
-rw-r--r--src/common/io_exerciser/OpType.h91
-rw-r--r--src/common/io_exerciser/RadosIo.cc577
-rw-r--r--src/common/io_exerciser/RadosIo.h112
-rw-r--r--src/common/json/BalancerStructures.cc38
-rw-r--r--src/common/json/BalancerStructures.h35
-rw-r--r--src/common/json/CMakeLists.txt4
-rw-r--r--src/common/json/ConfigStructures.cc20
-rw-r--r--src/common/json/ConfigStructures.h24
-rw-r--r--src/common/json/OSDStructures.cc150
-rw-r--r--src/common/json/OSDStructures.h189
-rw-r--r--src/common/options/crimson.yaml.in2
-rw-r--r--src/common/options/mds.yaml.in2
-rw-r--r--src/common/options/mon.yaml.in7
-rw-r--r--src/common/pick_address.cc29
-rw-r--r--src/common/pick_address.h2
-rw-r--r--src/crimson/common/shared_lru.h23
-rw-r--r--src/crimson/os/alienstore/alien_store.cc44
-rw-r--r--src/crimson/os/alienstore/alien_store.h35
-rw-r--r--src/crimson/os/cyanstore/cyan_store.cc27
-rw-r--r--src/crimson/os/cyanstore/cyan_store.h27
-rw-r--r--src/crimson/os/futurized_store.h27
-rw-r--r--src/crimson/os/seastore/async_cleaner.cc6
-rw-r--r--src/crimson/os/seastore/async_cleaner.h14
-rw-r--r--src/crimson/os/seastore/cache.cc8
-rw-r--r--src/crimson/os/seastore/cache.h23
-rw-r--r--src/crimson/os/seastore/seastore.cc49
-rw-r--r--src/crimson/os/seastore/seastore.h36
-rw-r--r--src/crimson/os/seastore/seastore_types.h33
-rw-r--r--src/crimson/os/seastore/transaction.h15
-rw-r--r--src/crimson/os/seastore/transaction_manager.cc2
-rw-r--r--src/crimson/os/seastore/transaction_manager.h3
-rw-r--r--src/crimson/osd/backfill_facades.h3
-rw-r--r--src/crimson/osd/backfill_state.cc266
-rw-r--r--src/crimson/osd/backfill_state.h78
-rw-r--r--src/crimson/osd/osd_operation.h15
-rw-r--r--src/crimson/osd/osd_operations/client_request.h4
-rw-r--r--src/crimson/osd/osd_operations/logmissing_request.h3
-rw-r--r--src/crimson/osd/osd_operations/logmissing_request_reply.h3
-rw-r--r--src/crimson/osd/osd_operations/peering_event.h8
-rw-r--r--src/crimson/osd/osd_operations/pg_advance_map.h4
-rw-r--r--src/crimson/osd/osd_operations/recovery_subrequest.h3
-rw-r--r--src/crimson/osd/osd_operations/replicated_request.h3
-rw-r--r--src/crimson/osd/osd_operations/scrub_events.h12
-rw-r--r--src/crimson/osd/pg.cc11
-rw-r--r--src/crimson/osd/pg.h5
-rw-r--r--src/crimson/osd/pg_backend.cc15
-rw-r--r--src/crimson/osd/pg_backend.h9
-rw-r--r--src/crimson/osd/pg_recovery.cc19
-rw-r--r--src/crimson/osd/pg_recovery.h2
-rw-r--r--src/crimson/osd/pg_shard_manager.h46
-rw-r--r--src/crimson/osd/replicated_backend.cc32
-rw-r--r--src/crimson/osd/replicated_recovery_backend.cc41
-rw-r--r--src/crimson/osd/shard_services.cc5
-rw-r--r--src/crimson/osd/shard_services.h3
-rw-r--r--src/crimson/tools/store_nbd/tm_driver.cc2
-rw-r--r--src/exporter/ceph_exporter.cc12
-rw-r--r--src/include/rados/librados.hpp34
-rw-r--r--src/include/random.h4
-rw-r--r--src/kv/KeyValueDB.h22
-rw-r--r--src/kv/RocksDBStore.cc92
-rw-r--r--src/kv/RocksDBStore.h4
-rw-r--r--src/librados/librados_asio.h72
-rw-r--r--src/librados/librados_cxx.cc8
-rw-r--r--src/mgr/PyModule.h4
-rwxr-xr-xsrc/mon/NVMeofGwMap.cc43
-rwxr-xr-xsrc/mon/NVMeofGwMap.h2
-rw-r--r--src/mon/NVMeofGwMon.h2
-rw-r--r--src/os/DBObjectMap.cc5
-rw-r--r--src/os/DBObjectMap.h2
-rw-r--r--src/os/ObjectStore.h52
-rw-r--r--src/os/bluestore/BlueStore.cc157
-rw-r--r--src/os/bluestore/BlueStore.h18
-rw-r--r--src/os/kstore/KStore.cc72
-rw-r--r--src/os/kstore/KStore.h8
-rw-r--r--src/os/memstore/MemStore.cc70
-rw-r--r--src/os/memstore/MemStore.h15
-rw-r--r--src/osd/ECBackend.cc26
-rw-r--r--src/osd/ECCommon.cc315
-rw-r--r--src/osd/ECCommon.h13
-rw-r--r--src/osd/OSD.cc96
-rw-r--r--src/osd/OSDMap.cc6
-rw-r--r--src/osd/PGBackend.h4
-rw-r--r--src/osd/PrimaryLogPG.cc76
-rw-r--r--src/osd/PrimaryLogPG.h8
-rw-r--r--src/pybind/mgr/cephadm/inventory.py4
-rw-r--r--src/pybind/mgr/cephadm/module.py46
-rw-r--r--src/pybind/mgr/cephadm/schedule.py2
-rw-r--r--src/pybind/mgr/cephadm/services/cephadmservice.py8
-rw-r--r--src/pybind/mgr/cephadm/services/monitoring.py16
-rw-r--r--src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j22
-rw-r--r--src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j21
-rw-r--r--src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j216
-rw-r--r--src/pybind/mgr/cephadm/tests/test_cephadm.py12
-rw-r--r--src/pybind/mgr/cephadm/tests/test_services.py70
-rw-r--r--src/pybind/mgr/orchestrator/_interface.py4
-rw-r--r--src/pybind/mgr/orchestrator/module.py24
-rw-r--r--src/python-common/ceph/deployment/service_spec.py7
-rw-r--r--src/rgw/CMakeLists.txt6
-rw-r--r--src/rgw/driver/daos/rgw_sal_daos.cc2
-rw-r--r--src/rgw/driver/daos/rgw_sal_daos.h1
-rw-r--r--src/rgw/driver/motr/rgw_sal_motr.cc5
-rw-r--r--src/rgw/driver/motr/rgw_sal_motr.h1
-rw-r--r--src/rgw/driver/posix/rgw_sal_posix.cc8
-rw-r--r--src/rgw/driver/posix/rgw_sal_posix.h7
-rw-r--r--src/rgw/driver/rados/rgw_period.cc14
-rw-r--r--src/rgw/driver/rados/rgw_rados.cc15
-rw-r--r--src/rgw/driver/rados/rgw_rados.h6
-rw-r--r--src/rgw/driver/rados/rgw_sal_rados.cc108
-rw-r--r--src/rgw/driver/rados/rgw_sal_rados.h9
-rw-r--r--src/rgw/driver/rados/rgw_tools.cc32
-rw-r--r--src/rgw/driver/rados/rgw_user.h8
-rw-r--r--src/rgw/driver/rados/rgw_zone.h1
-rw-r--r--src/rgw/radosgw-admin/orphan.cc (renamed from src/rgw/rgw_orphan.cc)7
-rw-r--r--src/rgw/radosgw-admin/orphan.h (renamed from src/rgw/rgw_orphan.h)0
-rw-r--r--src/rgw/radosgw-admin/radosgw-admin.cc (renamed from src/rgw/rgw_admin.cc)39
-rw-r--r--src/rgw/radosgw-admin/sync_checkpoint.cc (renamed from src/rgw/rgw_sync_checkpoint.cc)6
-rw-r--r--src/rgw/radosgw-admin/sync_checkpoint.h (renamed from src/rgw/rgw_sync_checkpoint.h)0
-rw-r--r--src/rgw/rgw_cksum_pipe.cc11
-rw-r--r--src/rgw/rgw_cksum_pipe.h36
-rw-r--r--src/rgw/rgw_common.cc1
-rw-r--r--src/rgw/rgw_common.h1
-rw-r--r--src/rgw/rgw_iam_policy.cc9
-rw-r--r--src/rgw/rgw_iam_policy.h4
-rw-r--r--src/rgw/rgw_op.cc189
-rw-r--r--src/rgw/rgw_op.h47
-rw-r--r--src/rgw/rgw_op_type.h1
-rw-r--r--src/rgw/rgw_rest.h11
-rw-r--r--src/rgw/rgw_rest_s3.cc199
-rw-r--r--src/rgw/rgw_rest_s3.h18
-rw-r--r--src/rgw/rgw_sal.h31
-rw-r--r--src/rgw/rgw_sal_dbstore.cc16
-rw-r--r--src/rgw/rgw_sal_dbstore.h9
-rw-r--r--src/rgw/rgw_sal_filter.cc11
-rw-r--r--src/rgw/rgw_sal_filter.h9
-rw-r--r--src/rgw/services/svc_zone.cc12
-rw-r--r--src/rgw/services/svc_zone.h1
-rwxr-xr-xsrc/script/run-make.sh1
-rw-r--r--src/test/ObjectMap/KeyValueDBMemory.cc21
-rw-r--r--src/test/crimson/seastore/test_btree_lba_manager.cc25
-rw-r--r--src/test/crimson/seastore/test_seastore_cache.cc5
-rw-r--r--src/test/crimson/test_backfill.cc142
-rw-r--r--src/test/librados/aio.cc56
-rw-r--r--src/test/librados/aio_cxx.cc89
-rw-r--r--src/test/librados/asio.cc137
-rw-r--r--src/test/objectstore/ObjectStoreImitator.h10
-rw-r--r--src/test/objectstore/allocsim/ops_replayer.cc35
-rw-r--r--src/test/osd/CMakeLists.txt2
-rw-r--r--src/test/osd/ceph_test_rados_io_sequence.cc1041
-rw-r--r--src/test/osd/ceph_test_rados_io_sequence.h580
-rw-r--r--src/test/pybind/pytest.ini1
-rw-r--r--src/test/pybind/test_rados.py2
-rw-r--r--src/test/rgw/rgw_multi/tests.py22
-rw-r--r--src/test/rgw/test_rgw_iam_policy.cc8
-rw-r--r--src/test/test_ipaddr.cc155
-rwxr-xr-xsrc/vstart.sh20
180 files changed, 7329 insertions, 3036 deletions
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/zap.py b/src/ceph-volume/ceph_volume/devices/lvm/zap.py
index c278de43eb0..a6d82c7f0fa 100644
--- a/src/ceph-volume/ceph_volume/devices/lvm/zap.py
+++ b/src/ceph-volume/ceph_volume/devices/lvm/zap.py
@@ -119,13 +119,12 @@ class Zap:
osd_uuid = details.get('osd_uuid')
break
- for osd_uuid, details in raw_report.items():
+ for _, details in raw_report.items():
device: str = details.get('device')
if details.get('osd_uuid') == osd_uuid:
raw_devices.add(device)
return list(raw_devices)
-
def find_associated_devices(self) -> List[api.Volume]:
"""From an ``osd_id`` and/or an ``osd_fsid``, filter out all the Logical Volumes (LVs) in the
diff --git a/src/ceph-volume/ceph_volume/tests/devices/lvm/data_zap.py b/src/ceph-volume/ceph_volume/tests/devices/lvm/data_zap.py
index cca64e83ab0..c971b7776ef 100644
--- a/src/ceph-volume/ceph_volume/tests/devices/lvm/data_zap.py
+++ b/src/ceph-volume/ceph_volume/tests/devices/lvm/data_zap.py
@@ -22,7 +22,7 @@ ceph_bluestore_tool_output = '''
"whoami": "0"
},
"/dev/vdx": {
- "osd_uuid": "d5a496bc-dcb9-4ad0-a12c-393d3200d2b6",
+ "osd_uuid": "d5a496bc-dcb9-4ad0-a12c-393d3200d2b7",
"size": 214748364800,
"btime": "2024-10-16T10:51:05.955279+0000",
"description": "main",
diff --git a/src/ceph-volume/ceph_volume/util/disk.py b/src/ceph-volume/ceph_volume/util/disk.py
index 77b55314f66..921e61a4534 100644
--- a/src/ceph-volume/ceph_volume/util/disk.py
+++ b/src/ceph-volume/ceph_volume/util/disk.py
@@ -347,12 +347,21 @@ def lsblk_all(device: str = '',
return result
-def is_device(dev):
+def is_device(dev: str) -> bool:
"""
- Boolean to determine if a given device is a block device (**not**
- a partition!)
+ Determines whether the given path corresponds to a block device (not a partition).
- For example: /dev/sda would return True, but not /dev/sdc1
+ This function checks whether the provided device path represents a valid block device,
+ such as a physical disk (/dev/sda) or an allowed loop device, but excludes partitions
+ (/dev/sdc1). It performs several validation steps, including file existence, path format,
+ device type, and additional checks for loop devices if allowed.
+
+ Args:
+ dev (str): The path to the device (e.g., "/dev/sda").
+
+ Returns:
+ bool: True if the path corresponds to a valid block device (not a partition),
+ otherwise False.
"""
if not os.path.exists(dev):
return False
@@ -364,7 +373,7 @@ def is_device(dev):
TYPE = lsblk(dev).get('TYPE')
if TYPE:
- return TYPE in ['disk', 'mpath']
+ return TYPE in ['disk', 'mpath', 'loop']
# fallback to stat
return _stat_is_device(os.lstat(dev).st_mode) and not is_partition(dev)
diff --git a/src/ceph-volume/ceph_volume/util/prepare.py b/src/ceph-volume/ceph_volume/util/prepare.py
index 9c863b83d93..ff7fc023fc4 100644
--- a/src/ceph-volume/ceph_volume/util/prepare.py
+++ b/src/ceph-volume/ceph_volume/util/prepare.py
@@ -9,6 +9,7 @@ import logging
import json
from ceph_volume import process, conf, terminal
from ceph_volume.util import system, constants, str_to_int, disk
+from typing import Optional
logger = logging.getLogger(__name__)
mlogger = terminal.MultiLogger(__name__)
@@ -121,7 +122,7 @@ def get_block_wal_size(lv_format=True):
return wal_size
-def create_id(fsid, json_secrets, osd_id=None):
+def create_id(fsid: str, json_secrets: str, osd_id: Optional[str]=None) -> str:
"""
:param fsid: The osd fsid to create, always required
:param json_secrets: a json-ready object with whatever secrets are wanted
diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py
index d2ddf564116..a8616980e4d 100755
--- a/src/cephadm/cephadm.py
+++ b/src/cephadm/cephadm.py
@@ -111,6 +111,7 @@ from cephadmlib.file_utils import (
unlink_file,
write_new,
write_tmp,
+ update_meta_file,
)
from cephadmlib.net_utils import (
build_addrv_params,
@@ -3453,6 +3454,7 @@ def list_daemons(
detail: bool = True,
legacy_dir: Optional[str] = None,
daemon_name: Optional[str] = None,
+ type_of_daemon: Optional[str] = None,
) -> List[Dict[str, str]]:
host_version: Optional[str] = None
ls = []
@@ -3489,6 +3491,8 @@ def list_daemons(
if os.path.exists(data_dir):
for i in os.listdir(data_dir):
if i in ['mon', 'osd', 'mds', 'mgr', 'rgw']:
+ if type_of_daemon and type_of_daemon != i:
+ continue
daemon_type = i
for j in os.listdir(os.path.join(data_dir, i)):
if '-' not in j:
@@ -3525,6 +3529,8 @@ def list_daemons(
if daemon_name and name != daemon_name:
continue
(daemon_type, daemon_id) = j.split('.', 1)
+ if type_of_daemon and type_of_daemon != daemon_type:
+ continue
unit_name = get_unit_name(fsid,
daemon_type,
daemon_id)
@@ -4705,6 +4711,34 @@ def command_list_images(ctx: CephadmContext) -> None:
# print default images
cp_obj.write(sys.stdout)
+
+def update_service_for_daemon(ctx: CephadmContext,
+ available_daemons: list,
+ update_daemons: list) -> None:
+ """ Update the unit.meta file of daemon with required service name for valid daemons"""
+
+ data = {'service_name': ctx.service_name}
+ # check if all the daemon names are valid
+ if not set(update_daemons).issubset(set(available_daemons)):
+ raise Error(f'Error EINVAL: one or more daemons of {update_daemons} does not exist on this host')
+ for name in update_daemons:
+ path = os.path.join(ctx.data_dir, ctx.fsid, name, 'unit.meta')
+ update_meta_file(path, data)
+ print(f'Successfully updated daemon {name} with service {ctx.service_name}')
+
+
+@infer_fsid
+def command_update_osd_service(ctx: CephadmContext) -> int:
+ """update service for provided daemon"""
+ update_daemons = [f'osd.{osd_id}' for osd_id in ctx.osd_ids.split(',')]
+ daemons = list_daemons(ctx, detail=False, type_of_daemon='osd')
+ if not daemons:
+ raise Error(f'Daemon {ctx.osd_ids} does not exists on this host')
+ available_daemons = [d['name'] for d in daemons]
+ update_service_for_daemon(ctx, available_daemons, update_daemons)
+ return 0
+
+
##################################
@@ -5571,6 +5605,14 @@ def _get_parser():
parser_list_images = subparsers.add_parser(
'list-images', help='list all the default images')
parser_list_images.set_defaults(func=command_list_images)
+
+ parser_update_service = subparsers.add_parser(
+ 'update-osd-service', help='update service for provided daemon')
+ parser_update_service.set_defaults(func=command_update_osd_service)
+ parser_update_service.add_argument('--fsid', help='cluster FSID')
+ parser_update_service.add_argument('--osd-ids', required=True, help='Comma-separated OSD IDs')
+ parser_update_service.add_argument('--service-name', required=True, help='OSD service name')
+
return parser
diff --git a/src/cephadm/cephadmlib/daemons/monitoring.py b/src/cephadm/cephadmlib/daemons/monitoring.py
index 9a9402632b0..4ba00daaefb 100644
--- a/src/cephadm/cephadmlib/daemons/monitoring.py
+++ b/src/cephadm/cephadmlib/daemons/monitoring.py
@@ -16,7 +16,13 @@ from ..daemon_form import register as register_daemon_form
from ..daemon_identity import DaemonIdentity
from ..deployment_utils import to_deployment_container
from ..exceptions import Error
-from ..net_utils import get_fqdn, get_hostname, get_ip_addresses, wrap_ipv6
+from ..net_utils import (
+ get_fqdn,
+ get_hostname,
+ get_ip_addresses,
+ wrap_ipv6,
+ EndPoint,
+)
@register_daemon_form
@@ -89,11 +95,6 @@ class Monitoring(ContainerDaemonForm):
'image': DefaultImages.ALERTMANAGER.image_ref,
'cpus': '2',
'memory': '2GB',
- 'args': [
- '--cluster.listen-address=:{}'.format(
- port_map['alertmanager'][1]
- ),
- ],
'config-json-files': [
'alertmanager.yml',
],
@@ -248,11 +249,14 @@ class Monitoring(ContainerDaemonForm):
ip = meta['ip']
if 'ports' in meta and meta['ports']:
port = meta['ports'][0]
- if daemon_type == 'prometheus':
- config = fetch_configs(ctx)
+ config = fetch_configs(ctx)
+ if daemon_type in ['prometheus', 'alertmanager']:
ip_to_bind_to = config.get('ip_to_bind_to', '')
if ip_to_bind_to:
ip = ip_to_bind_to
+ web_listen_addr = str(EndPoint(ip, port))
+ r += [f'--web.listen-address={web_listen_addr}']
+ if daemon_type == 'prometheus':
retention_time = config.get('retention_time', '15d')
retention_size = config.get(
'retention_size', '0'
@@ -276,9 +280,11 @@ class Monitoring(ContainerDaemonForm):
r += ['--web.route-prefix=/prometheus/']
else:
r += [f'--web.external-url={scheme}://{host}:{port}']
- r += [f'--web.listen-address={ip}:{port}']
if daemon_type == 'alertmanager':
- config = fetch_configs(ctx)
+ clus_listen_addr = str(
+ EndPoint(ip, self.port_map[daemon_type][1])
+ )
+ r += [f'--cluster.listen-address={clus_listen_addr}']
use_url_prefix = config.get('use_url_prefix', False)
peers = config.get('peers', list()) # type: ignore
for peer in peers:
@@ -294,13 +300,11 @@ class Monitoring(ContainerDaemonForm):
if daemon_type == 'promtail':
r += ['--config.expand-env']
if daemon_type == 'prometheus':
- config = fetch_configs(ctx)
try:
r += [f'--web.config.file={config["web_config"]}']
except KeyError:
pass
if daemon_type == 'node-exporter':
- config = fetch_configs(ctx)
try:
r += [f'--web.config.file={config["web_config"]}']
except KeyError:
diff --git a/src/cephadm/cephadmlib/file_utils.py b/src/cephadm/cephadmlib/file_utils.py
index 27e70e31756..4dd88cc3671 100644
--- a/src/cephadm/cephadmlib/file_utils.py
+++ b/src/cephadm/cephadmlib/file_utils.py
@@ -5,6 +5,7 @@ import datetime
import logging
import os
import tempfile
+import json
from contextlib import contextmanager
from pathlib import Path
@@ -157,3 +158,26 @@ def unlink_file(
except Exception:
if not ignore_errors:
raise
+
+
+def update_meta_file(file_path: str, update_key_val: dict) -> None:
+ """Update key in the file with provided value"""
+ try:
+ with open(file_path, 'r') as fh:
+ data = json.load(fh)
+ file_stat = os.stat(file_path)
+ except FileNotFoundError:
+ raise
+ except Exception:
+ logger.exception(f'Failed to update {file_path}')
+ raise
+ data.update(
+ {key: value for key, value in update_key_val.items() if key in data}
+ )
+
+ with write_new(
+ file_path,
+ owner=(file_stat.st_uid, file_stat.st_gid),
+ perms=(file_stat.st_mode & 0o777),
+ ) as fh:
+ fh.write(json.dumps(data, indent=4) + '\n')
diff --git a/src/cephadm/cephadmlib/net_utils.py b/src/cephadm/cephadmlib/net_utils.py
index 9a7f138b1c6..bfa61d933ef 100644
--- a/src/cephadm/cephadmlib/net_utils.py
+++ b/src/cephadm/cephadmlib/net_utils.py
@@ -24,12 +24,22 @@ class EndPoint:
def __init__(self, ip: str, port: int) -> None:
self.ip = ip
self.port = port
+ self.is_ipv4 = True
+ try:
+ if ip and ipaddress.ip_network(ip).version == 6:
+ self.is_ipv4 = False
+ except Exception:
+ logger.exception('Failed to check ip address version')
def __str__(self) -> str:
- return f'{self.ip}:{self.port}'
+ if self.is_ipv4:
+ return f'{self.ip}:{self.port}'
+ return f'[{self.ip}]:{self.port}'
def __repr__(self) -> str:
- return f'{self.ip}:{self.port}'
+ if self.is_ipv4:
+ return f'{self.ip}:{self.port}'
+ return f'[{self.ip}]:{self.port}'
def attempt_bind(ctx, s, address, port):
diff --git a/src/cephadm/tests/test_deploy.py b/src/cephadm/tests/test_deploy.py
index c5094db335f..1736639ed55 100644
--- a/src/cephadm/tests/test_deploy.py
+++ b/src/cephadm/tests/test_deploy.py
@@ -316,7 +316,7 @@ def test_deploy_a_monitoring_container(cephadm_fs, funkypatch):
runfile_lines = f.read().splitlines()
assert 'podman' in runfile_lines[-1]
assert runfile_lines[-1].endswith(
- 'quay.io/titans/prometheus:latest --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --storage.tsdb.retention.time=15d --storage.tsdb.retention.size=0 --web.external-url=http://10.10.10.10:9095 --web.listen-address=1.2.3.4:9095'
+ 'quay.io/titans/prometheus:latest --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --web.listen-address=1.2.3.4:9095 --storage.tsdb.retention.time=15d --storage.tsdb.retention.size=0 --web.external-url=http://10.10.10.10:9095'
)
assert '--user 8765' in runfile_lines[-1]
assert f'-v /var/lib/ceph/{fsid}/prometheus.fire/etc/prometheus:/etc/prometheus:Z' in runfile_lines[-1]
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index ea3cce16609..c607839a8d2 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -13,6 +13,7 @@ if(WIN32)
endif()
add_subdirectory(io_exerciser)
+add_subdirectory(json)
add_subdirectory(options)
set(common_srcs
diff --git a/src/common/bit_vector.hpp b/src/common/bit_vector.hpp
index 961d9a0192e..c5fd491ed29 100644
--- a/src/common/bit_vector.hpp
+++ b/src/common/bit_vector.hpp
@@ -29,8 +29,8 @@ private:
static const uint8_t MASK = static_cast<uint8_t>((1 << _bit_count) - 1);
// must be power of 2
- BOOST_STATIC_ASSERT((_bit_count != 0) && !(_bit_count & (_bit_count - 1)));
- BOOST_STATIC_ASSERT(_bit_count <= BITS_PER_BYTE);
+ static_assert((_bit_count != 0) && !(_bit_count & (_bit_count - 1)));
+ static_assert(_bit_count <= BITS_PER_BYTE);
template <typename DataIterator>
class ReferenceImpl {
diff --git a/src/common/ceph_time.h b/src/common/ceph_time.h
index 01feff4c063..0b05be5372e 100644
--- a/src/common/ceph_time.h
+++ b/src/common/ceph_time.h
@@ -342,6 +342,23 @@ public:
}
};
+// Please note time_guard is not thread safety -- multiple threads
+// updating same diff_accumulator can corrupt it.
+template <class ClockT = mono_clock>
+class time_guard {
+ const typename ClockT::time_point start;
+ timespan& diff_accumulator;
+
+public:
+ time_guard(timespan& diff_accumulator)
+ : start(ClockT::now()),
+ diff_accumulator(diff_accumulator) {
+ }
+ ~time_guard() {
+ diff_accumulator += ClockT::now() - start;
+ }
+};
+
namespace time_detail {
// So that our subtractions produce negative spans rather than
// arithmetic underflow.
diff --git a/src/common/io_exerciser/CMakeLists.txt b/src/common/io_exerciser/CMakeLists.txt
index 07091df86e1..ab2e64fc222 100644
--- a/src/common/io_exerciser/CMakeLists.txt
+++ b/src/common/io_exerciser/CMakeLists.txt
@@ -5,9 +5,11 @@ add_library(object_io_exerciser STATIC
Model.cc
ObjectModel.cc
RadosIo.cc
+ EcIoSequence.cc
)
target_link_libraries(object_io_exerciser
- librados
+ librados
global
+ json_structures
) \ No newline at end of file
diff --git a/src/common/io_exerciser/DataGenerator.cc b/src/common/io_exerciser/DataGenerator.cc
index 9aa77eeb6e9..701c32fa9ec 100644
--- a/src/common/io_exerciser/DataGenerator.cc
+++ b/src/common/io_exerciser/DataGenerator.cc
@@ -2,32 +2,28 @@
// vim: ts=8 sw=2 smarttab
#include "DataGenerator.h"
-#include "ObjectModel.h"
+#include <chrono>
+#include <iostream>
+#include <stdexcept>
+#include "ObjectModel.h"
#include "common/debug.h"
#include "common/dout.h"
-
#include "fmt/format.h"
#include "fmt/ranges.h"
-#include <chrono>
-#include <iostream>
-#include <stdexcept>
-
#define dout_subsys ceph_subsys_rados
#define dout_context g_ceph_context
using DataGenerator = ceph::io_exerciser::data_generation::DataGenerator;
-using SeededRandomGenerator = ceph::io_exerciser::data_generation
- ::SeededRandomGenerator;
-using HeaderedSeededRandomGenerator = ceph::io_exerciser::data_generation
- ::HeaderedSeededRandomGenerator;
+using SeededRandomGenerator =
+ ceph::io_exerciser::data_generation ::SeededRandomGenerator;
+using HeaderedSeededRandomGenerator =
+ ceph::io_exerciser::data_generation ::HeaderedSeededRandomGenerator;
std::unique_ptr<DataGenerator> DataGenerator::create_generator(
- GenerationType generationType, const ObjectModel& model)
-{
- switch(generationType)
- {
+ GenerationType generationType, const ObjectModel& model) {
+ switch (generationType) {
case GenerationType::SeededRandom:
return std::make_unique<SeededRandomGenerator>(model);
case GenerationType::HeaderedSeededRandom:
@@ -39,28 +35,25 @@ std::unique_ptr<DataGenerator> DataGenerator::create_generator(
return nullptr;
}
-bufferlist DataGenerator::generate_wrong_data(uint64_t offset, uint64_t length)
-{
+bufferlist DataGenerator::generate_wrong_data(uint64_t offset,
+ uint64_t length) {
bufferlist retlist;
uint64_t block_size = m_model.get_block_size();
char buffer[block_size];
- for (uint64_t block_offset = offset;
- block_offset < offset + length;
- block_offset++)
- {
+ for (uint64_t block_offset = offset; block_offset < offset + length;
+ block_offset++) {
std::memset(buffer, 0, block_size);
retlist.append(ceph::bufferptr(buffer, block_size));
}
return retlist;
}
-bool DataGenerator::validate(bufferlist& bufferlist, uint64_t offset, uint64_t length)
-{
+bool DataGenerator::validate(bufferlist& bufferlist, uint64_t offset,
+ uint64_t length) {
return bufferlist.contents_equal(generate_data(offset, length));
}
-ceph::bufferptr SeededRandomGenerator::generate_block(uint64_t block_offset)
-{
+ceph::bufferptr SeededRandomGenerator::generate_block(uint64_t block_offset) {
uint64_t block_size = m_model.get_block_size();
char buffer[block_size];
@@ -70,29 +63,26 @@ ceph::bufferptr SeededRandomGenerator::generate_block(uint64_t block_offset)
constexpr size_t generation_length = sizeof(uint64_t);
- for (uint64_t i = 0; i < block_size; i+=(2*generation_length), rand1++, rand2--)
- {
+ for (uint64_t i = 0; i < block_size;
+ i += (2 * generation_length), rand1++, rand2--) {
std::memcpy(buffer + i, &rand1, generation_length);
std::memcpy(buffer + i + generation_length, &rand2, generation_length);
}
size_t remainingBytes = block_size % (generation_length * 2);
- if (remainingBytes > generation_length)
- {
+ if (remainingBytes > generation_length) {
size_t remainingBytes2 = remainingBytes - generation_length;
std::memcpy(buffer + block_size - remainingBytes, &rand1, remainingBytes);
std::memcpy(buffer + block_size - remainingBytes2, &rand2, remainingBytes2);
- }
- else if (remainingBytes > 0)
- {
+ } else if (remainingBytes > 0) {
std::memcpy(buffer + block_size - remainingBytes, &rand1, remainingBytes);
}
return ceph::bufferptr(buffer, block_size);
}
-ceph::bufferptr SeededRandomGenerator::generate_wrong_block(uint64_t block_offset)
-{
+ceph::bufferptr SeededRandomGenerator::generate_wrong_block(
+ uint64_t block_offset) {
uint64_t block_size = m_model.get_block_size();
char buffer[block_size];
@@ -102,141 +92,134 @@ ceph::bufferptr SeededRandomGenerator::generate_wrong_block(uint64_t block_offse
constexpr size_t generation_length = sizeof(uint64_t);
- for (uint64_t i = 0; i < block_size; i+=(2*generation_length), rand1++, rand2--)
- {
+ for (uint64_t i = 0; i < block_size;
+ i += (2 * generation_length), rand1++, rand2--) {
std::memcpy(buffer + i, &rand1, generation_length);
std::memcpy(buffer + i + generation_length, &rand2, generation_length);
}
size_t remainingBytes = block_size % (generation_length * 2);
- if (remainingBytes > generation_length)
- {
+ if (remainingBytes > generation_length) {
size_t remainingBytes2 = remainingBytes - generation_length;
std::memcpy(buffer + block_size - remainingBytes, &rand1, remainingBytes);
std::memcpy(buffer + block_size - remainingBytes2, &rand2, remainingBytes2);
- }
- else if (remainingBytes > 0)
- {
+ } else if (remainingBytes > 0) {
std::memcpy(buffer + block_size - remainingBytes, &rand1, remainingBytes);
}
return ceph::bufferptr(buffer, block_size);
}
-bufferlist SeededRandomGenerator::generate_data(uint64_t offset, uint64_t length)
-{
+bufferlist SeededRandomGenerator::generate_data(uint64_t offset,
+ uint64_t length) {
bufferlist retlist;
- for (uint64_t block_offset = offset; block_offset < offset + length; block_offset++)
- {
+ for (uint64_t block_offset = offset; block_offset < offset + length;
+ block_offset++) {
retlist.append(generate_block(block_offset));
}
return retlist;
}
-bufferlist SeededRandomGenerator::generate_wrong_data(uint64_t offset, uint64_t length)
-{
+bufferlist SeededRandomGenerator::generate_wrong_data(uint64_t offset,
+ uint64_t length) {
bufferlist retlist;
- for (uint64_t block_offset = offset; block_offset < offset + length; block_offset++)
- {
+ for (uint64_t block_offset = offset; block_offset < offset + length;
+ block_offset++) {
retlist.append(generate_wrong_block(block_offset));
}
return retlist;
}
-HeaderedSeededRandomGenerator
- ::HeaderedSeededRandomGenerator(const ObjectModel& model,
- std::optional<uint64_t> unique_run_id) :
- SeededRandomGenerator(model),
- unique_run_id(unique_run_id.value_or(generate_unique_run_id()))
-{
-
-}
+HeaderedSeededRandomGenerator ::HeaderedSeededRandomGenerator(
+ const ObjectModel& model, std::optional<uint64_t> unique_run_id)
+ : SeededRandomGenerator(model),
+ unique_run_id(unique_run_id.value_or(generate_unique_run_id())) {}
-uint64_t HeaderedSeededRandomGenerator::generate_unique_run_id()
-{
+uint64_t HeaderedSeededRandomGenerator::generate_unique_run_id() {
std::mt19937_64 random_generator =
- std::mt19937_64(duration_cast<std::chrono::milliseconds>(
- std::chrono::system_clock::now().time_since_epoch()).count());
+ std::mt19937_64(duration_cast<std::chrono::milliseconds>(
+ std::chrono::system_clock::now().time_since_epoch())
+ .count());
- return random_generator();
+ return random_generator();
}
-ceph::bufferptr HeaderedSeededRandomGenerator::generate_block(uint64_t block_offset)
-{
+ceph::bufferptr HeaderedSeededRandomGenerator::generate_block(
+ uint64_t block_offset) {
SeedBytes seed = m_model.get_seed(block_offset);
- TimeBytes current_time = duration_cast<std::chrono::milliseconds>(
- std::chrono::system_clock::now().time_since_epoch()).count();
+ TimeBytes current_time =
+ duration_cast<std::chrono::milliseconds>(
+ std::chrono::system_clock::now().time_since_epoch())
+ .count();
- ceph::bufferptr bufferptr = SeededRandomGenerator::generate_block(block_offset);
+ ceph::bufferptr bufferptr =
+ SeededRandomGenerator::generate_block(block_offset);
- std::memcpy(bufferptr.c_str() + uniqueIdStart(), &unique_run_id, uniqueIdLength());
+ std::memcpy(bufferptr.c_str() + uniqueIdStart(), &unique_run_id,
+ uniqueIdLength());
std::memcpy(bufferptr.c_str() + seedStart(), &seed, seedLength());
std::memcpy(bufferptr.c_str() + timeStart(), &current_time, timeLength());
return bufferptr;
}
-ceph::bufferptr HeaderedSeededRandomGenerator::generate_wrong_block(uint64_t block_offset)
-{
+ceph::bufferptr HeaderedSeededRandomGenerator::generate_wrong_block(
+ uint64_t block_offset) {
return HeaderedSeededRandomGenerator::generate_block(block_offset % 8);
}
const HeaderedSeededRandomGenerator::UniqueIdBytes
- HeaderedSeededRandomGenerator::readUniqueRunId(uint64_t block_offset,
- const bufferlist& bufferlist)
-{
+HeaderedSeededRandomGenerator::readUniqueRunId(uint64_t block_offset,
+ const bufferlist& bufferlist) {
UniqueIdBytes read_unique_run_id = 0;
- std::memcpy(&read_unique_run_id,
- &bufferlist[(block_offset * m_model.get_block_size()) + uniqueIdStart()],
- uniqueIdLength());
+ std::memcpy(
+ &read_unique_run_id,
+ &bufferlist[(block_offset * m_model.get_block_size()) + uniqueIdStart()],
+ uniqueIdLength());
return read_unique_run_id;
}
const HeaderedSeededRandomGenerator::SeedBytes
- HeaderedSeededRandomGenerator::readSeed(uint64_t block_offset,
- const bufferlist& bufferlist)
-{
+HeaderedSeededRandomGenerator::readSeed(uint64_t block_offset,
+ const bufferlist& bufferlist) {
SeedBytes read_seed = 0;
- std::memcpy(&read_seed,
- &bufferlist[(block_offset * m_model.get_block_size()) + seedStart()],
- seedLength());
+ std::memcpy(
+ &read_seed,
+ &bufferlist[(block_offset * m_model.get_block_size()) + seedStart()],
+ seedLength());
return read_seed;
}
const HeaderedSeededRandomGenerator::TimeBytes
- HeaderedSeededRandomGenerator::readDateTime(uint64_t block_offset,
- const bufferlist& bufferlist)
-{
+HeaderedSeededRandomGenerator::readDateTime(uint64_t block_offset,
+ const bufferlist& bufferlist) {
TimeBytes read_time = 0;
- std::memcpy(&read_time,
- &bufferlist[(block_offset * m_model.get_block_size()) + timeStart()],
- timeLength());
+ std::memcpy(
+ &read_time,
+ &bufferlist[(block_offset * m_model.get_block_size()) + timeStart()],
+ timeLength());
return read_time;
}
bool HeaderedSeededRandomGenerator::validate(bufferlist& bufferlist,
- uint64_t offset, uint64_t length)
-{
+ uint64_t offset, uint64_t length) {
std::vector<uint64_t> invalid_block_offsets;
- for (uint64_t block_offset = offset; block_offset < offset + length; block_offset++)
- {
- bool valid_block
- = validate_block(block_offset,
- (bufferlist.c_str() + ((block_offset - offset) *
- m_model.get_block_size())));
- if (!valid_block)
- {
+ for (uint64_t block_offset = offset; block_offset < offset + length;
+ block_offset++) {
+ bool valid_block = validate_block(
+ block_offset, (bufferlist.c_str() +
+ ((block_offset - offset) * m_model.get_block_size())));
+ if (!valid_block) {
invalid_block_offsets.push_back(block_offset);
}
}
- if (!invalid_block_offsets.empty())
- {
+ if (!invalid_block_offsets.empty()) {
printDebugInformationForOffsets(offset, invalid_block_offsets, bufferlist);
}
@@ -244,59 +227,51 @@ bool HeaderedSeededRandomGenerator::validate(bufferlist& bufferlist,
}
bool HeaderedSeededRandomGenerator::validate_block(uint64_t block_offset,
- const char* buffer_start)
-{
+ const char* buffer_start) {
// We validate the block matches what we generate byte for byte
// however we ignore the time section of the header
ceph::bufferptr bufferptr = generate_block(block_offset);
bool valid = strncmp(bufferptr.c_str(), buffer_start, timeStart()) == 0;
- valid = valid ? strncmp(bufferptr.c_str() + timeEnd(),
- buffer_start + timeEnd(),
- m_model.get_block_size() - timeEnd()) == 0 : valid;
+ valid = valid
+ ? strncmp(bufferptr.c_str() + timeEnd(), buffer_start + timeEnd(),
+ m_model.get_block_size() - timeEnd()) == 0
+ : valid;
return valid;
}
const HeaderedSeededRandomGenerator::ErrorType
- HeaderedSeededRandomGenerator::getErrorTypeForBlock(uint64_t read_offset,
- uint64_t block_offset,
- const bufferlist& bufferlist)
-{
- try
- {
- UniqueIdBytes read_unique_run_id = readUniqueRunId(block_offset - read_offset,
- bufferlist);
- if (unique_run_id != read_unique_run_id)
- {
+HeaderedSeededRandomGenerator::getErrorTypeForBlock(
+ uint64_t read_offset, uint64_t block_offset, const bufferlist& bufferlist) {
+ try {
+ UniqueIdBytes read_unique_run_id =
+ readUniqueRunId(block_offset - read_offset, bufferlist);
+ if (unique_run_id != read_unique_run_id) {
return ErrorType::RUN_ID_MISMATCH;
}
SeedBytes read_seed = readSeed(block_offset - read_offset, bufferlist);
- if (m_model.get_seed(block_offset) != read_seed)
- {
+ if (m_model.get_seed(block_offset) != read_seed) {
return ErrorType::SEED_MISMATCH;
}
if (std::strncmp(&bufferlist[((block_offset - read_offset) *
- m_model.get_block_size()) + bodyStart()],
+ m_model.get_block_size()) +
+ bodyStart()],
generate_block(block_offset).c_str() + bodyStart(),
- m_model.get_block_size() - bodyStart()) != 0)
- {
+ m_model.get_block_size() - bodyStart()) != 0) {
return ErrorType::DATA_MISMATCH;
}
- }
- catch(const std::exception& e)
- {
+ } catch (const std::exception& e) {
return ErrorType::DATA_NOT_FOUND;
}
return ErrorType::UNKNOWN;
}
-void HeaderedSeededRandomGenerator
- ::printDebugInformationForBlock(uint64_t read_offset, uint64_t block_offset,
- const bufferlist& bufferlist)
-{
- ErrorType blockError = getErrorTypeForBlock(read_offset, block_offset, bufferlist);
+void HeaderedSeededRandomGenerator ::printDebugInformationForBlock(
+ uint64_t read_offset, uint64_t block_offset, const bufferlist& bufferlist) {
+ ErrorType blockError =
+ getErrorTypeForBlock(read_offset, block_offset, bufferlist);
TimeBytes read_time = 0;
std::time_t ttp;
@@ -304,433 +279,361 @@ void HeaderedSeededRandomGenerator
char read_bytes[m_model.get_block_size()];
char generated_bytes[m_model.get_block_size()];
- if (blockError == ErrorType::DATA_MISMATCH || blockError == ErrorType::UNKNOWN)
- {
+ if (blockError == ErrorType::DATA_MISMATCH ||
+ blockError == ErrorType::UNKNOWN) {
read_time = readDateTime(block_offset - read_offset, bufferlist);
- std::chrono::system_clock::time_point time_point{std::chrono::milliseconds{read_time}};
+ std::chrono::system_clock::time_point time_point{
+ std::chrono::milliseconds{read_time}};
ttp = std::chrono::system_clock::to_time_t(time_point);
- std::memcpy(&read_bytes,
- &bufferlist[((block_offset - read_offset) * m_model.get_block_size())],
- m_model.get_block_size() - bodyStart());
- std::memcpy(&generated_bytes,
- generate_block(block_offset).c_str(),
+ std::memcpy(
+ &read_bytes,
+ &bufferlist[((block_offset - read_offset) * m_model.get_block_size())],
+ m_model.get_block_size() - bodyStart());
+ std::memcpy(&generated_bytes, generate_block(block_offset).c_str(),
m_model.get_block_size() - bodyStart());
}
std::string error_string;
- switch(blockError)
- {
- case ErrorType::RUN_ID_MISMATCH:
- {
- UniqueIdBytes read_unique_run_id = readUniqueRunId((block_offset - read_offset),
- bufferlist);
- error_string = fmt::format("Header (Run ID) mismatch detected at block {} "
- "(byte offset {}) Header expected run id {} but found id {}. "
- "Block data corrupt or not written from this instance of this application.",
- block_offset,
- block_offset * m_model.get_block_size(),
- unique_run_id,
- read_unique_run_id);
- }
- break;
-
- case ErrorType::SEED_MISMATCH:
- {
+ switch (blockError) {
+ case ErrorType::RUN_ID_MISMATCH: {
+ UniqueIdBytes read_unique_run_id =
+ readUniqueRunId((block_offset - read_offset), bufferlist);
+ error_string = fmt::format(
+ "Header (Run ID) mismatch detected at block {} "
+ "(byte offset {}) Header expected run id {} but found id {}. "
+ "Block data corrupt or not written from this instance of this "
+ "application.",
+ block_offset, block_offset * m_model.get_block_size(), unique_run_id,
+ read_unique_run_id);
+ } break;
+
+ case ErrorType::SEED_MISMATCH: {
SeedBytes read_seed = readSeed((block_offset - read_offset), bufferlist);
- if (m_model.get_seed_offsets(read_seed).size() == 0)
- {
- error_string = fmt::format("Data (Seed) mismatch detected at block {}"
- " (byte offset {}). Header expected seed {} but found seed {}. "
- "Read data was not from any other recognised block in the object.",
- block_offset,
- block_offset * m_model.get_block_size(),
- m_model.get_seed(block_offset),
- read_seed);
- }
- else
- {
+ if (m_model.get_seed_offsets(read_seed).size() == 0) {
+ error_string = fmt::format(
+ "Data (Seed) mismatch detected at block {}"
+ " (byte offset {}). Header expected seed {} but found seed {}. "
+ "Read data was not from any other recognised block in the object.",
+ block_offset, block_offset * m_model.get_block_size(),
+ m_model.get_seed(block_offset), read_seed);
+ } else {
std::vector<int> seed_offsets = m_model.get_seed_offsets(read_seed);
- error_string = fmt::format("Data (Seed) mismatch detected at block {}"
- " (byte offset {}). Header expected seed {} but found seed {}."
- " Read data was from a different block(s): {}",
- block_offset,
- block_offset * m_model.get_block_size(),
- m_model.get_seed(block_offset),
- read_seed,
+ error_string = fmt::format(
+ "Data (Seed) mismatch detected at block {}"
+ " (byte offset {}). Header expected seed {} but found seed {}."
+ " Read data was from a different block(s): {}",
+ block_offset, block_offset * m_model.get_block_size(),
+ m_model.get_seed(block_offset), read_seed,
fmt::join(seed_offsets.begin(), seed_offsets.end(), ""));
}
- }
- break;
-
- case ErrorType::DATA_MISMATCH:
- {
- error_string = fmt::format("Data (Body) mismatch detected at block {}"
- " (byte offset {}). Header data matches, data body does not."
- " Data written at {}\nExpected data: \n{:02x}\nRead data:{:02x}",
- block_offset,
- block_offset * m_model.get_block_size(),
+ } break;
+
+ case ErrorType::DATA_MISMATCH: {
+ error_string = fmt::format(
+ "Data (Body) mismatch detected at block {}"
+ " (byte offset {}). Header data matches, data body does not."
+ " Data written at {}\nExpected data: \n{:02x}\nRead data:{:02x}",
+ block_offset, block_offset * m_model.get_block_size(),
std::ctime(&ttp),
- fmt::join(generated_bytes, generated_bytes + m_model.get_block_size(), ""),
+ fmt::join(generated_bytes, generated_bytes + m_model.get_block_size(),
+ ""),
fmt::join(read_bytes, read_bytes + m_model.get_block_size(), ""));
- }
- break;
+ } break;
- case ErrorType::DATA_NOT_FOUND:
- {
+ case ErrorType::DATA_NOT_FOUND: {
uint64_t bufferlist_length = bufferlist.to_str().size();
- error_string = fmt::format("Data (Body) could not be read at block {}"
- " (byte offset {}) offset in bufferlist returned from read: {}"
- " ({} bytes). Returned bufferlist length: {}.",
- block_offset,
- block_offset * m_model.get_block_size(),
+ error_string = fmt::format(
+ "Data (Body) could not be read at block {}"
+ " (byte offset {}) offset in bufferlist returned from read: {}"
+ " ({} bytes). Returned bufferlist length: {}.",
+ block_offset, block_offset * m_model.get_block_size(),
(block_offset - read_offset),
(block_offset - read_offset) * m_model.get_block_size(),
bufferlist_length);
- }
- break;
+ } break;
case ErrorType::UNKNOWN:
- [[ fallthrough ]];
-
- default:
- {
- error_string = fmt::format("Data mismatch detected at block {}"
- " (byte offset {}).\nExpected data:\n{:02x}\nRead data:\n{:02x}",
- block_offset,
- block_offset * m_model.get_block_size(),
- fmt::join(generated_bytes, generated_bytes + m_model.get_block_size(), ""),
+ [[fallthrough]];
+
+ default: {
+ error_string = fmt::format(
+ "Data mismatch detected at block {}"
+ " (byte offset {}).\nExpected data:\n{:02x}\nRead data:\n{:02x}",
+ block_offset, block_offset * m_model.get_block_size(),
+ fmt::join(generated_bytes, generated_bytes + m_model.get_block_size(),
+ ""),
fmt::join(read_bytes, read_bytes + m_model.get_block_size(), ""));
- }
- break;
+ } break;
}
dout(0) << error_string << dendl;
}
-void HeaderedSeededRandomGenerator
- ::printDebugInformationForRange(uint64_t read_offset,
- uint64_t start_block_offset,
- uint64_t range_length_in_blocks,
- ErrorType rangeError,
- const bufferlist& bufferlist)
-{
- switch(rangeError)
- {
- case ErrorType::RUN_ID_MISMATCH:
- printDebugInformationForRunIdMismatchRange(read_offset, start_block_offset,
- range_length_in_blocks, bufferlist);
- break;
- case ErrorType::SEED_MISMATCH:
- printDebugInformationForSeedMismatchRange(read_offset, start_block_offset,
- range_length_in_blocks, bufferlist);
- break;
- case ErrorType::DATA_MISMATCH:
- printDebugInformationDataBodyMismatchRange(read_offset, start_block_offset,
- range_length_in_blocks, bufferlist);
- break;
- case ErrorType::DATA_NOT_FOUND:
- printDebugInformationDataNotFoundRange(read_offset, start_block_offset,
- range_length_in_blocks, bufferlist);
- break;
- case ErrorType::UNKNOWN:
- [[ fallthrough ]];
- default:
- printDebugInformationCorruptRange(read_offset, start_block_offset,
- range_length_in_blocks, bufferlist);
- break;
+void HeaderedSeededRandomGenerator ::printDebugInformationForRange(
+ uint64_t read_offset, uint64_t start_block_offset,
+ uint64_t range_length_in_blocks, ErrorType rangeError,
+ const bufferlist& bufferlist) {
+ switch (rangeError) {
+ case ErrorType::RUN_ID_MISMATCH:
+ printDebugInformationForRunIdMismatchRange(
+ read_offset, start_block_offset, range_length_in_blocks, bufferlist);
+ break;
+ case ErrorType::SEED_MISMATCH:
+ printDebugInformationForSeedMismatchRange(
+ read_offset, start_block_offset, range_length_in_blocks, bufferlist);
+ break;
+ case ErrorType::DATA_MISMATCH:
+ printDebugInformationDataBodyMismatchRange(
+ read_offset, start_block_offset, range_length_in_blocks, bufferlist);
+ break;
+ case ErrorType::DATA_NOT_FOUND:
+ printDebugInformationDataNotFoundRange(
+ read_offset, start_block_offset, range_length_in_blocks, bufferlist);
+ break;
+ case ErrorType::UNKNOWN:
+ [[fallthrough]];
+ default:
+ printDebugInformationCorruptRange(read_offset, start_block_offset,
+ range_length_in_blocks, bufferlist);
+ break;
}
}
-void HeaderedSeededRandomGenerator
- ::printDebugInformationForRunIdMismatchRange(uint64_t read_offset,
- uint64_t start_block_offset,
- uint64_t range_length_in_blocks,
- const bufferlist& bufferlist)
-{
+void HeaderedSeededRandomGenerator ::printDebugInformationForRunIdMismatchRange(
+ uint64_t read_offset, uint64_t start_block_offset,
+ uint64_t range_length_in_blocks, const bufferlist& bufferlist) {
uint64_t range_start = start_block_offset;
uint64_t range_length = 0;
- UniqueIdBytes initial_read_unique_run_id = readUniqueRunId(start_block_offset - read_offset,
- bufferlist);
+ UniqueIdBytes initial_read_unique_run_id =
+ readUniqueRunId(start_block_offset - read_offset, bufferlist);
for (uint64_t i = start_block_offset;
- i < start_block_offset + range_length_in_blocks; i++)
- {
- ceph_assert(getErrorTypeForBlock(read_offset, i, bufferlist)
- == ErrorType::RUN_ID_MISMATCH);
+ i < start_block_offset + range_length_in_blocks; i++) {
+ ceph_assert(getErrorTypeForBlock(read_offset, i, bufferlist) ==
+ ErrorType::RUN_ID_MISMATCH);
- UniqueIdBytes read_unique_run_id = readUniqueRunId(i - read_offset, bufferlist);
+ UniqueIdBytes read_unique_run_id =
+ readUniqueRunId(i - read_offset, bufferlist);
if (initial_read_unique_run_id != read_unique_run_id ||
- i == (start_block_offset + range_length_in_blocks - 1))
- {
- if (range_length == 1)
- {
+ i == (start_block_offset + range_length_in_blocks - 1)) {
+ if (range_length == 1) {
printDebugInformationForBlock(read_offset, i, bufferlist);
- }
- else if (range_length > 1)
- {
- dout(0) << fmt::format("Data (Run ID) Mismatch detected from block {} ({} bytes)"
- " and spanning a range of {} blocks ({} bytes). "
- "Expected run id {} for range but found id {}"
- " for all blocks in range. "
- "Block data corrupt or not written from this instance of this application.",
- range_start,
- range_start * m_model.get_block_size(),
- range_length,
- range_length * m_model.get_block_size(),
- unique_run_id,
- initial_read_unique_run_id) << dendl;
+ } else if (range_length > 1) {
+ dout(0)
+ << fmt::format(
+ "Data (Run ID) Mismatch detected from block {} ({} bytes)"
+ " and spanning a range of {} blocks ({} bytes). "
+ "Expected run id {} for range but found id {}"
+ " for all blocks in range. "
+ "Block data corrupt or not written from this instance of "
+ "this application.",
+ range_start, range_start * m_model.get_block_size(),
+ range_length, range_length * m_model.get_block_size(),
+ unique_run_id, initial_read_unique_run_id)
+ << dendl;
}
range_start = i;
range_length = 1;
initial_read_unique_run_id = read_unique_run_id;
- }
- else
- {
+ } else {
range_length++;
}
}
- if (range_length == 1)
- {
- printDebugInformationForBlock(read_offset,
- start_block_offset + range_length_in_blocks - 1,
- bufferlist);
- }
- else if (range_length > 1)
- {
- dout(0) << fmt::format("Data (Run ID) Mismatch detected from block {}"
- " ({} bytes) and spanning a range of {} blocks ({} bytes). "
- "Expected run id {} for range but found id for all blocks in range. "
- "Block data corrupt or not written from this instance of this application.",
- range_start,
- range_start * m_model.get_block_size(),
- range_length,
- range_length * m_model.get_block_size(),
- unique_run_id,
- initial_read_unique_run_id)
+ if (range_length == 1) {
+ printDebugInformationForBlock(
+ read_offset, start_block_offset + range_length_in_blocks - 1,
+ bufferlist);
+ } else if (range_length > 1) {
+ dout(0) << fmt::format(
+ "Data (Run ID) Mismatch detected from block {}"
+ " ({} bytes) and spanning a range of {} blocks ({} bytes). "
+ "Expected run id {} for range but found id for all blocks "
+ "in range. "
+ "Block data corrupt or not written from this instance of "
+ "this application.",
+ range_start, range_start * m_model.get_block_size(),
+ range_length, range_length * m_model.get_block_size(),
+ unique_run_id, initial_read_unique_run_id)
<< dendl;
}
}
-void HeaderedSeededRandomGenerator
- ::printDebugInformationForSeedMismatchRange(uint64_t read_offset,
- uint64_t start_block_offset,
- uint64_t range_length_in_blocks,
- const bufferlist& bufferlist)
-{
+void HeaderedSeededRandomGenerator ::printDebugInformationForSeedMismatchRange(
+ uint64_t read_offset, uint64_t start_block_offset,
+ uint64_t range_length_in_blocks, const bufferlist& bufferlist) {
uint64_t range_start = start_block_offset;
uint64_t range_length = 0;
// Assert here if needed, as we can't support values
// that can't be converted to a signed integer.
- ceph_assert(m_model.get_block_size() < (std::numeric_limits<uint64_t>::max() / 2));
+ ceph_assert(m_model.get_block_size() <
+ (std::numeric_limits<uint64_t>::max() / 2));
std::optional<int64_t> range_offset = 0;
for (uint64_t i = start_block_offset;
- i < start_block_offset + range_length_in_blocks; i++)
- {
- ceph_assert(getErrorTypeForBlock(read_offset, i, bufferlist)
- == ErrorType::SEED_MISMATCH);
+ i < start_block_offset + range_length_in_blocks; i++) {
+ ceph_assert(getErrorTypeForBlock(read_offset, i, bufferlist) ==
+ ErrorType::SEED_MISMATCH);
SeedBytes read_seed = readSeed(i - read_offset, bufferlist);
std::vector<int> seed_found_offsets = m_model.get_seed_offsets(read_seed);
if ((seed_found_offsets.size() == 1 &&
- (static_cast<int64_t>(seed_found_offsets.front() - i) == range_offset)) ||
- range_length == 0)
- {
- if (range_length == 0)
- {
+ (static_cast<int64_t>(seed_found_offsets.front() - i) ==
+ range_offset)) ||
+ range_length == 0) {
+ if (range_length == 0) {
range_start = i;
- if (seed_found_offsets.size() > 0)
- {
+ if (seed_found_offsets.size() > 0) {
range_offset = seed_found_offsets.front() - i;
- }
- else
- {
+ } else {
range_offset = std::nullopt;
}
}
range_length++;
- }
- else
- {
- if (range_length == 1)
- {
+ } else {
+ if (range_length == 1) {
printDebugInformationForBlock(read_offset, i - 1, bufferlist);
- }
- else if (range_length > 1 && range_offset.has_value())
- {
- dout(0) << fmt::format("Data (Seed) Mismatch detected from block {}"
- " ({} bytes) and spanning a range of {} blocks ({} bytes). "
- "Returned data located starting from block {} ({} bytes) "
- "and spanning a range of {} blocks ({} bytes).",
- range_start,
- range_start * m_model.get_block_size(),
- range_length, range_length * m_model.get_block_size(),
- static_cast<uint64_t>(*range_offset) + range_start,
- (static_cast<uint64_t>(*range_offset) + range_start)
- * m_model.get_block_size(),
- range_length,
- range_length * m_model.get_block_size())
- << dendl;
- }
- else
- {
- dout(0) << fmt::format("Data (Seed) Mismatch detected from block {}"
- " ({} bytes) and spanning a range of {} blocks ({} bytes). "
- "Data seed mismatch spanning a range of {} blocks ({} bytes).",
- range_start,
- range_start * m_model.get_block_size(),
- range_length, range_length * m_model.get_block_size(),
- range_length,
- range_length * m_model.get_block_size())
- << dendl;
+ } else if (range_length > 1 && range_offset.has_value()) {
+ dout(0)
+ << fmt::format(
+ "Data (Seed) Mismatch detected from block {}"
+ " ({} bytes) and spanning a range of {} blocks ({} bytes). "
+ "Returned data located starting from block {} ({} bytes) "
+ "and spanning a range of {} blocks ({} bytes).",
+ range_start, range_start * m_model.get_block_size(),
+ range_length, range_length * m_model.get_block_size(),
+ static_cast<uint64_t>(*range_offset) + range_start,
+ (static_cast<uint64_t>(*range_offset) + range_start) *
+ m_model.get_block_size(),
+ range_length, range_length * m_model.get_block_size())
+ << dendl;
+ } else {
+ dout(0)
+ << fmt::format(
+ "Data (Seed) Mismatch detected from block {}"
+ " ({} bytes) and spanning a range of {} blocks ({} bytes). "
+ "Data seed mismatch spanning a range of {} blocks ({} "
+ "bytes).",
+ range_start, range_start * m_model.get_block_size(),
+ range_length, range_length * m_model.get_block_size(),
+ range_length, range_length * m_model.get_block_size())
+ << dendl;
}
range_length = 1;
range_start = i;
- if (seed_found_offsets.size() > 0)
- {
+ if (seed_found_offsets.size() > 0) {
range_offset = seed_found_offsets.front() - i;
- }
- else
- {
+ } else {
range_offset = std::nullopt;
}
}
}
- if (range_length == 1)
- {
- printDebugInformationForBlock(read_offset,
- start_block_offset + range_length_in_blocks - 1,
- bufferlist);
- }
- else if (range_length > 1 && range_offset.has_value())
- {
- dout(0) << fmt::format("Data (Seed) Mismatch detected from block {} ({} bytes) "
- "and spanning a range of {} blocks ({} bytes). "
- "Returned data located starting from block {} ({} bytes) "
- "and spanning a range of {} blocks ({} bytes).",
- range_start,
- range_start * m_model.get_block_size(),
- range_length,
- range_length * m_model.get_block_size(),
- *range_offset + range_start,
- (*range_offset + range_start) * m_model.get_block_size(),
- range_length,
- range_length * m_model.get_block_size())
+ if (range_length == 1) {
+ printDebugInformationForBlock(
+ read_offset, start_block_offset + range_length_in_blocks - 1,
+ bufferlist);
+ } else if (range_length > 1 && range_offset.has_value()) {
+ dout(0) << fmt::format(
+ "Data (Seed) Mismatch detected from block {} ({} bytes) "
+ "and spanning a range of {} blocks ({} bytes). "
+ "Returned data located starting from block {} ({} bytes) "
+ "and spanning a range of {} blocks ({} bytes).",
+ range_start, range_start * m_model.get_block_size(),
+ range_length, range_length * m_model.get_block_size(),
+ *range_offset + range_start,
+ (*range_offset + range_start) * m_model.get_block_size(),
+ range_length, range_length * m_model.get_block_size())
<< dendl;
- }
- else
- {
- dout(0) << fmt::format("Data (Seed) Mismatch detected from block {} ({} bytes) "
- "and spanning a range of {} blocks ({} bytes). "
- "and spanning a range of {} blocks ({} bytes).",
- range_start,
- range_start * m_model.get_block_size(),
- range_length,
- range_length * m_model.get_block_size(),
- range_length,
- range_length * m_model.get_block_size())
+ } else {
+ dout(0) << fmt::format(
+ "Data (Seed) Mismatch detected from block {} ({} bytes) "
+ "and spanning a range of {} blocks ({} bytes). "
+ "and spanning a range of {} blocks ({} bytes).",
+ range_start, range_start * m_model.get_block_size(),
+ range_length, range_length * m_model.get_block_size(),
+ range_length, range_length * m_model.get_block_size())
<< dendl;
}
}
-void HeaderedSeededRandomGenerator
-::printDebugInformationDataBodyMismatchRange(uint64_t read_offset,
- uint64_t start_block_offset,
- uint64_t range_length_in_blocks,
- const bufferlist& bufferlist)
-{
- dout(0) << fmt::format("Data Mismatch detected in blocks from {} to {}. "
- "Headers look as expected for range, "
- "but generated data body does not match. "
- "More information given for individual blocks below.",
- start_block_offset,
- start_block_offset + range_length_in_blocks - 1)
+void HeaderedSeededRandomGenerator ::printDebugInformationDataBodyMismatchRange(
+ uint64_t read_offset, uint64_t start_block_offset,
+ uint64_t range_length_in_blocks, const bufferlist& bufferlist) {
+ dout(0) << fmt::format(
+ "Data Mismatch detected in blocks from {} to {}. "
+ "Headers look as expected for range, "
+ "but generated data body does not match. "
+ "More information given for individual blocks below.",
+ start_block_offset,
+ start_block_offset + range_length_in_blocks - 1)
<< dendl;
for (uint64_t i = start_block_offset;
- i < start_block_offset + range_length_in_blocks; i++)
- {
+ i < start_block_offset + range_length_in_blocks; i++) {
printDebugInformationForBlock(read_offset, i, bufferlist);
}
}
-void HeaderedSeededRandomGenerator
- ::printDebugInformationCorruptRange(uint64_t read_offset,
- uint64_t start_block_offset,
- uint64_t range_length_in_blocks,
- const bufferlist& bufferlist)
-{
- dout(0) << fmt::format("Data Mismatch detected in blocks from {} to {}. "
- "Headers look as expected for range, "
- "but generated data body does not match. "
- "More information given for individual blocks below.",
- start_block_offset,
- start_block_offset + range_length_in_blocks - 1)
+void HeaderedSeededRandomGenerator ::printDebugInformationCorruptRange(
+ uint64_t read_offset, uint64_t start_block_offset,
+ uint64_t range_length_in_blocks, const bufferlist& bufferlist) {
+ dout(0) << fmt::format(
+ "Data Mismatch detected in blocks from {} to {}. "
+ "Headers look as expected for range, "
+ "but generated data body does not match. "
+ "More information given for individual blocks below.",
+ start_block_offset,
+ start_block_offset + range_length_in_blocks - 1)
<< dendl;
for (uint64_t i = start_block_offset;
- i < start_block_offset + range_length_in_blocks; i++)
- {
+ i < start_block_offset + range_length_in_blocks; i++) {
printDebugInformationForBlock(read_offset, i, bufferlist);
}
}
-void HeaderedSeededRandomGenerator
- ::printDebugInformationDataNotFoundRange(uint64_t read_offset,
- uint64_t start_block_offset,
- uint64_t range_length_in_blocks,
- const bufferlist& bufferlist)
-{
- dout(0) << fmt::format("Data not found for blocks from {} to {}. "
- "More information given for individual blocks below.",
- start_block_offset,
- start_block_offset + range_length_in_blocks - 1)
+void HeaderedSeededRandomGenerator ::printDebugInformationDataNotFoundRange(
+ uint64_t read_offset, uint64_t start_block_offset,
+ uint64_t range_length_in_blocks, const bufferlist& bufferlist) {
+ dout(0) << fmt::format(
+ "Data not found for blocks from {} to {}. "
+ "More information given for individual blocks below.",
+ start_block_offset,
+ start_block_offset + range_length_in_blocks - 1)
<< dendl;
- for (uint64_t i = start_block_offset; i < start_block_offset + range_length_in_blocks; i++)
- {
+ for (uint64_t i = start_block_offset;
+ i < start_block_offset + range_length_in_blocks; i++) {
printDebugInformationForBlock(read_offset, i, bufferlist);
}
}
-void HeaderedSeededRandomGenerator
- ::printDebugInformationForOffsets(uint64_t read_offset,
- std::vector<uint64_t> offsets,
- const bufferlist& bufferlist)
-{
+void HeaderedSeededRandomGenerator ::printDebugInformationForOffsets(
+ uint64_t read_offset, std::vector<uint64_t> offsets,
+ const bufferlist& bufferlist) {
uint64_t range_start = 0;
uint64_t range_length = 0;
ErrorType rangeError = ErrorType::UNKNOWN;
- for (const uint64_t& block_offset : offsets)
- {
- ErrorType blockError = getErrorTypeForBlock(read_offset, block_offset,
- bufferlist);
+ for (const uint64_t& block_offset : offsets) {
+ ErrorType blockError =
+ getErrorTypeForBlock(read_offset, block_offset, bufferlist);
- if (range_start == 0 && range_length == 0)
- {
+ if (range_start == 0 && range_length == 0) {
range_start = block_offset;
range_length = 1;
rangeError = blockError;
- }
- else if (blockError == rangeError &&
- range_start + range_length == block_offset)
-{
+ } else if (blockError == rangeError &&
+ range_start + range_length == block_offset) {
range_length++;
- }
- else
- {
- if (range_length == 1)
- {
+ } else {
+ if (range_length == 1) {
printDebugInformationForBlock(read_offset, range_start, bufferlist);
- }
- else if (range_length > 1)
- {
+ } else if (range_length > 1) {
printDebugInformationForRange(read_offset, range_start, range_length,
rangeError, bufferlist);
}
@@ -741,12 +644,9 @@ void HeaderedSeededRandomGenerator
}
}
- if (range_length == 1)
- {
+ if (range_length == 1) {
printDebugInformationForBlock(read_offset, range_start, bufferlist);
- }
- else if (range_length > 1)
- {
+ } else if (range_length > 1) {
printDebugInformationForRange(read_offset, range_start, range_length,
rangeError, bufferlist);
}
diff --git a/src/common/io_exerciser/DataGenerator.h b/src/common/io_exerciser/DataGenerator.h
index 1e5784a54cc..c497c78ed61 100644
--- a/src/common/io_exerciser/DataGenerator.h
+++ b/src/common/io_exerciser/DataGenerator.h
@@ -3,8 +3,8 @@
#include <memory>
#include <random>
-#include "include/buffer.h"
#include "ObjectModel.h"
+#include "include/buffer.h"
/* Overview
*
@@ -23,149 +23,139 @@
*
* class HeaderedSeededRandomGenerator
* Inherits from SeededDataGenerator. Generates entirely random patterns
- * based on the seed retrieved by the model, however also appends a
+ * based on the seed retrieved by the model, however also appends a
* header to the start of each block. This generator also provides
* a range of verbose debug options to help disagnose a miscompare
* whenever it detects unexpected data.
*/
namespace ceph {
- namespace io_exerciser {
- namespace data_generation {
- enum class GenerationType {
- SeededRandom,
- HeaderedSeededRandom
- // CompressedGenerator
- // MixedGenerator
- };
-
- class DataGenerator {
- public:
- virtual ~DataGenerator() = default;
- static std::unique_ptr<DataGenerator>
- create_generator(GenerationType generatorType,
- const ObjectModel& model);
- virtual bufferlist generate_data(uint64_t length, uint64_t offset)=0;
- virtual bool validate(bufferlist& bufferlist, uint64_t offset,
- uint64_t length);
-
- // Used for testing debug outputs from data generation
- virtual bufferlist generate_wrong_data(uint64_t offset, uint64_t length);
-
- protected:
- const ObjectModel& m_model;
-
- DataGenerator(const ObjectModel& model) : m_model(model) {}
- };
-
- class SeededRandomGenerator : public DataGenerator
- {
- public:
- SeededRandomGenerator(const ObjectModel& model)
- : DataGenerator(model) {}
-
- virtual bufferptr generate_block(uint64_t offset);
- virtual bufferlist generate_data(uint64_t length, uint64_t offset);
- virtual bufferptr generate_wrong_block(uint64_t offset);
- virtual bufferlist generate_wrong_data(uint64_t offset, uint64_t length) override;
- };
-
- class HeaderedSeededRandomGenerator : public SeededRandomGenerator
- {
- public:
- HeaderedSeededRandomGenerator(const ObjectModel& model,
- std::optional<uint64_t> unique_run_id = std::nullopt);
-
- bufferptr generate_block(uint64_t offset) override;
- bufferptr generate_wrong_block(uint64_t offset) override;
- bool validate(bufferlist& bufferlist, uint64_t offset,
- uint64_t length) override;
-
- private:
- using UniqueIdBytes = uint64_t;
- using SeedBytes = int;
- using TimeBytes = uint64_t;
-
- enum class ErrorType {
- RUN_ID_MISMATCH,
- SEED_MISMATCH,
- DATA_MISMATCH,
- DATA_NOT_FOUND,
- UNKNOWN
- };
-
- constexpr uint8_t headerStart() const
- { return 0; };
- constexpr uint8_t uniqueIdStart() const
- { return headerStart(); };
- constexpr uint8_t uniqueIdLength() const
- { return sizeof(UniqueIdBytes); };
- constexpr uint8_t seedStart() const
- { return uniqueIdStart() + uniqueIdLength(); };
- constexpr uint8_t seedLength() const
- { return sizeof(SeedBytes); };
- constexpr uint8_t timeStart() const
- { return seedStart() + seedLength(); };
- constexpr uint8_t timeLength() const
- { return sizeof(TimeBytes); };
- constexpr uint8_t timeEnd() const
- { return timeStart() + timeLength(); };
- constexpr uint8_t headerLength() const
- { return uniqueIdLength() + seedLength() + timeLength(); };
- constexpr uint8_t bodyStart() const
- { return headerStart() + headerLength(); };
-
- const UniqueIdBytes readUniqueRunId(uint64_t block_offset,
- const bufferlist& bufferlist);
- const SeedBytes readSeed(uint64_t block_offset,
- const bufferlist& bufferlist);
- const TimeBytes readDateTime(uint64_t block_offset,
+namespace io_exerciser {
+namespace data_generation {
+enum class GenerationType {
+ SeededRandom,
+ HeaderedSeededRandom
+ // CompressedGenerator
+ // MixedGenerator
+};
+
+class DataGenerator {
+ public:
+ virtual ~DataGenerator() = default;
+ static std::unique_ptr<DataGenerator> create_generator(
+ GenerationType generatorType, const ObjectModel& model);
+ virtual bufferlist generate_data(uint64_t length, uint64_t offset) = 0;
+ virtual bool validate(bufferlist& bufferlist, uint64_t offset,
+ uint64_t length);
+
+ // Used for testing debug outputs from data generation
+ virtual bufferlist generate_wrong_data(uint64_t offset, uint64_t length);
+
+ protected:
+ const ObjectModel& m_model;
+
+ DataGenerator(const ObjectModel& model) : m_model(model) {}
+};
+
+class SeededRandomGenerator : public DataGenerator {
+ public:
+ SeededRandomGenerator(const ObjectModel& model) : DataGenerator(model) {}
+
+ virtual bufferptr generate_block(uint64_t offset);
+ bufferlist generate_data(uint64_t length, uint64_t offset) override;
+ virtual bufferptr generate_wrong_block(uint64_t offset);
+ bufferlist generate_wrong_data(uint64_t offset,
+ uint64_t length) override;
+};
+
+class HeaderedSeededRandomGenerator : public SeededRandomGenerator {
+ public:
+ HeaderedSeededRandomGenerator(
+ const ObjectModel& model,
+ std::optional<uint64_t> unique_run_id = std::nullopt);
+
+ bufferptr generate_block(uint64_t offset) override;
+ bufferptr generate_wrong_block(uint64_t offset) override;
+ bool validate(bufferlist& bufferlist, uint64_t offset,
+ uint64_t length) override;
+
+ private:
+ using UniqueIdBytes = uint64_t;
+ using SeedBytes = int;
+ using TimeBytes = uint64_t;
+
+ enum class ErrorType {
+ RUN_ID_MISMATCH,
+ SEED_MISMATCH,
+ DATA_MISMATCH,
+ DATA_NOT_FOUND,
+ UNKNOWN
+ };
+
+ constexpr uint8_t headerStart() const { return 0; };
+ constexpr uint8_t uniqueIdStart() const { return headerStart(); };
+ constexpr uint8_t uniqueIdLength() const { return sizeof(UniqueIdBytes); };
+ constexpr uint8_t seedStart() const {
+ return uniqueIdStart() + uniqueIdLength();
+ };
+ constexpr uint8_t seedLength() const { return sizeof(SeedBytes); };
+ constexpr uint8_t timeStart() const { return seedStart() + seedLength(); };
+ constexpr uint8_t timeLength() const { return sizeof(TimeBytes); };
+ constexpr uint8_t timeEnd() const { return timeStart() + timeLength(); };
+ constexpr uint8_t headerLength() const {
+ return uniqueIdLength() + seedLength() + timeLength();
+ };
+ constexpr uint8_t bodyStart() const {
+ return headerStart() + headerLength();
+ };
+
+ const UniqueIdBytes readUniqueRunId(uint64_t block_offset,
+ const bufferlist& bufferlist);
+ const SeedBytes readSeed(uint64_t block_offset, const bufferlist& bufferlist);
+ const TimeBytes readDateTime(uint64_t block_offset,
+ const bufferlist& bufferlist);
+
+ const UniqueIdBytes unique_run_id;
+
+ uint64_t generate_unique_run_id();
+
+ bool validate_block(uint64_t block_offset, const char* buffer_start);
+
+ const ErrorType getErrorTypeForBlock(uint64_t read_offset,
+ uint64_t block_offset,
const bufferlist& bufferlist);
- const UniqueIdBytes unique_run_id;
-
- uint64_t generate_unique_run_id();
-
- bool validate_block(uint64_t block_offset, const char* buffer_start);
-
- const ErrorType getErrorTypeForBlock(uint64_t read_offset,
- uint64_t block_offset,
- const bufferlist& bufferlist);
-
- void printDebugInformationForBlock(uint64_t read_offset,
- uint64_t block_offset,
- const bufferlist& bufferlist);
- void printDebugInformationForRange(uint64_t read_offset,
- uint64_t start_block_offset,
- uint64_t range_length_in_blocks,
- ErrorType rangeError,
- const bufferlist& bufferlist);
-
- void printDebugInformationForRunIdMismatchRange(uint64_t read_offset,
- uint64_t start_block_offset,
- uint64_t range_length_in_blocks,
- const bufferlist& bufferlist);
- void printDebugInformationForSeedMismatchRange(uint64_t read_offset,
- uint64_t start_block_offset,
- uint64_t range_length_in_blocks,
- const bufferlist& bufferlist);
- void printDebugInformationDataBodyMismatchRange(uint64_t read_offset,
- uint64_t start_block_offset,
- uint64_t range_length_in_blocks,
- const bufferlist& bufferlist);
- void printDebugInformationDataNotFoundRange(uint64_t ßread_offset,
- uint64_t start_block_offset,
- uint64_t range_length_in_blocks,
- const bufferlist& bufferlist);
- void printDebugInformationCorruptRange(uint64_t read_offset,
- uint64_t start_block_offset,
- uint64_t range_length_in_blocks,
- const bufferlist& bufferlist);
-
- void printDebugInformationForOffsets(uint64_t read_offset,
- std::vector<uint64_t> offsets,
- const bufferlist& bufferlist);
- };
- }
- }
-}
+ void printDebugInformationForBlock(uint64_t read_offset,
+ uint64_t block_offset,
+ const bufferlist& bufferlist);
+ void printDebugInformationForRange(uint64_t read_offset,
+ uint64_t start_block_offset,
+ uint64_t range_length_in_blocks,
+ ErrorType rangeError,
+ const bufferlist& bufferlist);
+
+ void printDebugInformationForRunIdMismatchRange(
+ uint64_t read_offset, uint64_t start_block_offset,
+ uint64_t range_length_in_blocks, const bufferlist& bufferlist);
+ void printDebugInformationForSeedMismatchRange(
+ uint64_t read_offset, uint64_t start_block_offset,
+ uint64_t range_length_in_blocks, const bufferlist& bufferlist);
+ void printDebugInformationDataBodyMismatchRange(
+ uint64_t read_offset, uint64_t start_block_offset,
+ uint64_t range_length_in_blocks, const bufferlist& bufferlist);
+ void printDebugInformationDataNotFoundRange(uint64_t ßread_offset,
+ uint64_t start_block_offset,
+ uint64_t range_length_in_blocks,
+ const bufferlist& bufferlist);
+ void printDebugInformationCorruptRange(uint64_t read_offset,
+ uint64_t start_block_offset,
+ uint64_t range_length_in_blocks,
+ const bufferlist& bufferlist);
+
+ void printDebugInformationForOffsets(uint64_t read_offset,
+ std::vector<uint64_t> offsets,
+ const bufferlist& bufferlist);
+};
+} // namespace data_generation
+} // namespace io_exerciser
+} // namespace ceph
diff --git a/src/common/io_exerciser/EcIoSequence.cc b/src/common/io_exerciser/EcIoSequence.cc
new file mode 100644
index 00000000000..611920c96e0
--- /dev/null
+++ b/src/common/io_exerciser/EcIoSequence.cc
@@ -0,0 +1,267 @@
+#include "EcIoSequence.h"
+
+#include <memory>
+
+using IoOp = ceph::io_exerciser::IoOp;
+using Sequence = ceph::io_exerciser::Sequence;
+using IoSequence = ceph::io_exerciser::IoSequence;
+using EcIoSequence = ceph::io_exerciser::EcIoSequence;
+using ReadInjectSequence = ceph::io_exerciser::ReadInjectSequence;
+
+bool EcIoSequence::is_supported(Sequence sequence) const { return true; }
+
+std::unique_ptr<IoSequence> EcIoSequence::generate_sequence(
+ Sequence sequence, std::pair<int, int> obj_size_range, int k, int m,
+ int seed) {
+ switch (sequence) {
+ case Sequence::SEQUENCE_SEQ0:
+ [[fallthrough]];
+ case Sequence::SEQUENCE_SEQ1:
+ [[fallthrough]];
+ case Sequence::SEQUENCE_SEQ2:
+ [[fallthrough]];
+ case Sequence::SEQUENCE_SEQ3:
+ [[fallthrough]];
+ case Sequence::SEQUENCE_SEQ4:
+ [[fallthrough]];
+ case Sequence::SEQUENCE_SEQ5:
+ [[fallthrough]];
+ case Sequence::SEQUENCE_SEQ6:
+ [[fallthrough]];
+ case Sequence::SEQUENCE_SEQ7:
+ [[fallthrough]];
+ case Sequence::SEQUENCE_SEQ8:
+ [[fallthrough]];
+ case Sequence::SEQUENCE_SEQ9:
+ return std::make_unique<ReadInjectSequence>(obj_size_range, seed,
+ sequence, k, m);
+ case Sequence::SEQUENCE_SEQ10:
+ return std::make_unique<Seq10>(obj_size_range, seed, k, m);
+ default:
+ ceph_abort_msg("Unrecognised sequence");
+ }
+}
+
+EcIoSequence::EcIoSequence(std::pair<int, int> obj_size_range, int seed)
+ : IoSequence(obj_size_range, seed),
+ setup_inject(false),
+ clear_inject(false),
+ shard_to_inject(std::nullopt) {}
+
+void EcIoSequence::select_random_data_shard_to_inject_read_error(int k, int m) {
+ shard_to_inject = rng(k - 1);
+ setup_inject = true;
+}
+
+void EcIoSequence::select_random_data_shard_to_inject_write_error(int k,
+ int m) {
+ // Write errors do not support injecting to the primary OSD
+ shard_to_inject = rng(1, k - 1);
+ setup_inject = true;
+}
+
+void EcIoSequence::select_random_shard_to_inject_read_error(int k, int m) {
+ shard_to_inject = rng(k + m - 1);
+ setup_inject = true;
+}
+
+void EcIoSequence::select_random_shard_to_inject_write_error(int k, int m) {
+ // Write errors do not support injecting to the primary OSD
+ shard_to_inject = rng(1, k + m - 1);
+ setup_inject = true;
+}
+
+void EcIoSequence::generate_random_read_inject_type() {
+ inject_op_type = static_cast<InjectOpType>(
+ rng(static_cast<int>(InjectOpType::ReadEIO),
+ static_cast<int>(InjectOpType::ReadMissingShard)));
+}
+
+void EcIoSequence::generate_random_write_inject_type() {
+ inject_op_type = static_cast<InjectOpType>(
+ rng(static_cast<int>(InjectOpType::WriteFailAndRollback),
+ static_cast<int>(InjectOpType::WriteOSDAbort)));
+}
+
+ceph::io_exerciser::ReadInjectSequence::ReadInjectSequence(
+ std::pair<int, int> obj_size_range, int seed, Sequence s, int k, int m)
+ : EcIoSequence(obj_size_range, seed) {
+ child_sequence = IoSequence::generate_sequence(s, obj_size_range, seed);
+ select_random_data_shard_to_inject_read_error(k, m);
+ generate_random_read_inject_type();
+}
+
+Sequence ceph::io_exerciser::ReadInjectSequence::get_id() const {
+ return child_sequence->get_id();
+}
+
+std::string ceph::io_exerciser::ReadInjectSequence::get_name() const {
+ return child_sequence->get_name() +
+ " running with read errors injected on shard " +
+ std::to_string(*shard_to_inject);
+}
+
+std::unique_ptr<IoOp> ReadInjectSequence::next() {
+ step++;
+
+ if (nextOp) {
+ std::unique_ptr<IoOp> retOp = nullptr;
+ nextOp.swap(retOp);
+ return retOp;
+ }
+
+ std::unique_ptr<IoOp> childOp = child_sequence->next();
+
+ switch (childOp->getOpType()) {
+ case OpType::Remove:
+ nextOp.swap(childOp);
+ switch (inject_op_type) {
+ case InjectOpType::ReadEIO:
+ return ClearReadErrorInjectOp::generate(*shard_to_inject, 0);
+ case InjectOpType::ReadMissingShard:
+ return ClearReadErrorInjectOp::generate(*shard_to_inject, 1);
+ case InjectOpType::WriteFailAndRollback:
+ return ClearWriteErrorInjectOp::generate(*shard_to_inject, 0);
+ case InjectOpType::WriteOSDAbort:
+ return ClearWriteErrorInjectOp::generate(*shard_to_inject, 3);
+ case InjectOpType::None:
+ [[fallthrough]];
+ default:
+ ceph_abort_msg("Unsupported operation");
+ }
+ break;
+ case OpType::Create:
+ switch (inject_op_type) {
+ case InjectOpType::ReadEIO:
+ nextOp = InjectReadErrorOp::generate(
+ *shard_to_inject, 0, 0, std::numeric_limits<uint64_t>::max());
+ break;
+ case InjectOpType::ReadMissingShard:
+ nextOp = InjectReadErrorOp::generate(
+ *shard_to_inject, 1, 0, std::numeric_limits<uint64_t>::max());
+ break;
+ case InjectOpType::WriteFailAndRollback:
+ nextOp = InjectWriteErrorOp::generate(
+ *shard_to_inject, 0, 0, std::numeric_limits<uint64_t>::max());
+ break;
+ case InjectOpType::WriteOSDAbort:
+ nextOp = InjectWriteErrorOp::generate(
+ *shard_to_inject, 3, 0, std::numeric_limits<uint64_t>::max());
+ break;
+ case InjectOpType::None:
+ [[fallthrough]];
+ default:
+ ceph_abort_msg("Unsupported operation");
+ }
+ break;
+ default:
+ // Do nothing in default case
+ break;
+ }
+
+ return childOp;
+}
+
+std::unique_ptr<ceph::io_exerciser::IoOp>
+ceph::io_exerciser::ReadInjectSequence::_next() {
+ ceph_abort_msg(
+ "Should not reach this point, "
+ "this sequence should only consume complete sequences");
+
+ return DoneOp::generate();
+}
+
+ceph::io_exerciser::Seq10::Seq10(std::pair<int, int> obj_size_range, int seed,
+ int k, int m)
+ : EcIoSequence(obj_size_range, seed),
+ offset(0),
+ length(1),
+ inject_error_done(false),
+ failed_write_done(false),
+ read_done(false),
+ successful_write_done(false),
+ test_all_lengths(false), // Only test length(1) due to time constraints
+ test_all_sizes(
+ false) // Only test obj_size(rand()) due to time constraints
+{
+ select_random_shard_to_inject_write_error(k, m);
+ // We will inject specifically as part of our sequence in this sequence
+ setup_inject = false;
+ if (!test_all_sizes) {
+ select_random_object_size();
+ }
+}
+
+Sequence ceph::io_exerciser::Seq10::get_id() const {
+ return Sequence::SEQUENCE_SEQ10;
+}
+
+std::string ceph::io_exerciser::Seq10::get_name() const {
+ return "Sequential writes of length " + std::to_string(length) +
+ " with queue depth 1"
+ " first injecting a failed write and read it to ensure it rolls back, "
+ "then"
+ " successfully writing the data and reading the write the ensure it "
+ "is applied";
+}
+
+std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq10::_next() {
+ if (!inject_error_done) {
+ inject_error_done = true;
+ return InjectWriteErrorOp::generate(*shard_to_inject, 0, 0,
+ std::numeric_limits<uint64_t>::max());
+ } else if (!failed_write_done) {
+ failed_write_done = true;
+ read_done = false;
+ barrier = true;
+ return SingleFailedWriteOp::generate(offset, length);
+ } else if (failed_write_done && !read_done) {
+ read_done = true;
+ barrier = true;
+ return SingleReadOp::generate(offset, length);
+ } else if (!clear_inject_done) {
+ clear_inject_done = true;
+ return ClearWriteErrorInjectOp::generate(*shard_to_inject, 0);
+ } else if (!successful_write_done) {
+ successful_write_done = true;
+ read_done = false;
+ barrier = true;
+ return SingleWriteOp::generate(offset, length);
+ } else if (successful_write_done && !read_done) {
+ read_done = true;
+ return SingleReadOp::generate(offset, length);
+ } else if (successful_write_done && read_done) {
+ offset++;
+ inject_error_done = false;
+ failed_write_done = false;
+ read_done = false;
+ clear_inject_done = false;
+ successful_write_done = false;
+
+ if (offset + length >= obj_size) {
+ if (!test_all_lengths) {
+ remove = true;
+ done = true;
+ return BarrierOp::generate();
+ }
+
+ offset = 0;
+ length++;
+ if (length > obj_size) {
+ if (!test_all_sizes) {
+ remove = true;
+ done = true;
+ return BarrierOp::generate();
+ }
+
+ length = 1;
+ return increment_object_size();
+ }
+ }
+
+ return BarrierOp::generate();
+ } else {
+ ceph_abort_msg("Sequence in undefined state. Aborting");
+ return DoneOp::generate();
+ }
+} \ No newline at end of file
diff --git a/src/common/io_exerciser/EcIoSequence.h b/src/common/io_exerciser/EcIoSequence.h
new file mode 100644
index 00000000000..37283b3906b
--- /dev/null
+++ b/src/common/io_exerciser/EcIoSequence.h
@@ -0,0 +1,65 @@
+#include "IoSequence.h"
+
+namespace ceph {
+namespace io_exerciser {
+class EcIoSequence : public IoSequence {
+ public:
+ virtual bool is_supported(Sequence sequence) const override;
+ static std::unique_ptr<IoSequence> generate_sequence(
+ Sequence s, std::pair<int, int> obj_size_range, int k, int m, int seed);
+
+ protected:
+ bool setup_inject;
+ bool clear_inject;
+ std::optional<uint64_t> shard_to_inject;
+ InjectOpType inject_op_type;
+
+ EcIoSequence(std::pair<int, int> obj_size_range, int seed);
+
+ // Writes cannot be sent to injected on shard zero, so selections seperated
+ // out
+ void select_random_data_shard_to_inject_read_error(int k, int m);
+ void select_random_data_shard_to_inject_write_error(int k, int m);
+ void select_random_shard_to_inject_read_error(int k, int m);
+ void select_random_shard_to_inject_write_error(int k, int m);
+ void generate_random_read_inject_type();
+ void generate_random_write_inject_type();
+};
+
+class ReadInjectSequence : public EcIoSequence {
+ public:
+ ReadInjectSequence(std::pair<int, int> obj_size_range, int seed, Sequence s,
+ int k, int m);
+
+ Sequence get_id() const override;
+ std::string get_name() const override;
+ virtual std::unique_ptr<IoOp> next() override;
+ std::unique_ptr<IoOp> _next() override;
+
+ private:
+ std::unique_ptr<IoSequence> child_sequence;
+ std::unique_ptr<IoOp> nextOp;
+};
+
+class Seq10 : public EcIoSequence {
+ public:
+ Seq10(std::pair<int, int> obj_size_range, int seed, int k, int m);
+
+ Sequence get_id() const override;
+ std::string get_name() const override;
+ std::unique_ptr<IoOp> _next() override;
+
+ private:
+ uint64_t offset;
+ uint64_t length;
+
+ bool inject_error_done;
+ bool failed_write_done;
+ bool read_done;
+ bool clear_inject_done;
+ bool successful_write_done;
+ bool test_all_lengths;
+ bool test_all_sizes;
+};
+} // namespace io_exerciser
+} // namespace ceph \ No newline at end of file
diff --git a/src/common/io_exerciser/IoOp.cc b/src/common/io_exerciser/IoOp.cc
index cd855ba6fff..493d1f435b4 100644
--- a/src/common/io_exerciser/IoOp.cc
+++ b/src/common/io_exerciser/IoOp.cc
@@ -1,188 +1,316 @@
#include "IoOp.h"
-using IoOp = ceph::io_exerciser::IoOp;
+#include "fmt/format.h"
+#include "include/ceph_assert.h"
-IoOp::IoOp( OpType op,
- uint64_t offset1, uint64_t length1,
- uint64_t offset2, uint64_t length2,
- uint64_t offset3, uint64_t length3) :
- op(op),
- offset1(offset1), length1(length1),
- offset2(offset2), length2(length2),
- offset3(offset3), length3(length3)
-{
+using IoOp = ceph::io_exerciser::IoOp;
+using OpType = ceph::io_exerciser::OpType;
-}
+using DoneOp = ceph::io_exerciser::DoneOp;
+using BarrierOp = ceph::io_exerciser::BarrierOp;
+using CreateOp = ceph::io_exerciser::CreateOp;
+using RemoveOp = ceph::io_exerciser::RemoveOp;
+using SingleReadOp = ceph::io_exerciser::SingleReadOp;
+using DoubleReadOp = ceph::io_exerciser::DoubleReadOp;
+using TripleReadOp = ceph::io_exerciser::TripleReadOp;
+using SingleWriteOp = ceph::io_exerciser::SingleWriteOp;
+using DoubleWriteOp = ceph::io_exerciser::DoubleWriteOp;
+using TripleWriteOp = ceph::io_exerciser::TripleWriteOp;
+using SingleFailedWriteOp = ceph::io_exerciser::SingleFailedWriteOp;
+using DoubleFailedWriteOp = ceph::io_exerciser::DoubleFailedWriteOp;
+using TripleFailedWriteOp = ceph::io_exerciser::TripleFailedWriteOp;
-std::string IoOp::value_to_string(uint64_t v) const
-{
+namespace {
+std::string value_to_string(uint64_t v) {
if (v < 1024 || (v % 1024) != 0) {
return std::to_string(v);
- }else if (v < 1024*1024 || (v % (1024 * 1024)) != 0 ) {
+ } else if (v < 1024 * 1024 || (v % (1024 * 1024)) != 0) {
return std::to_string(v / 1024) + "K";
- }else{
+ } else {
return std::to_string(v / 1024 / 1024) + "M";
}
}
+} // namespace
-std::unique_ptr<IoOp> IoOp
- ::generate_done() {
+IoOp::IoOp() {}
- return std::make_unique<IoOp>(OpType::Done);
-}
+template <OpType opType>
+ceph::io_exerciser::TestOp<opType>::TestOp() : IoOp() {}
+
+DoneOp::DoneOp() : TestOp<OpType::Done>() {}
-std::unique_ptr<IoOp> IoOp
- ::generate_barrier() {
+std::string DoneOp::to_string(uint64_t block_size) const { return "Done"; }
- return std::make_unique<IoOp>(OpType::BARRIER);
+std::unique_ptr<DoneOp> DoneOp::generate() {
+ return std::make_unique<DoneOp>();
}
-std::unique_ptr<IoOp> IoOp
- ::generate_create(uint64_t size) {
+BarrierOp::BarrierOp() : TestOp<OpType::Barrier>() {}
- return std::make_unique<IoOp>(OpType::CREATE,0,size);
+std::unique_ptr<BarrierOp> BarrierOp::generate() {
+ return std::make_unique<BarrierOp>();
}
-std::unique_ptr<IoOp> IoOp
- ::generate_remove() {
-
- return std::make_unique<IoOp>(OpType::REMOVE);
+std::string BarrierOp::to_string(uint64_t block_size) const {
+ return "Barrier";
}
-std::unique_ptr<IoOp> IoOp
- ::generate_read(uint64_t offset, uint64_t length) {
+CreateOp::CreateOp(uint64_t size) : TestOp<OpType::Create>(), size(size) {}
- return std::make_unique<IoOp>(OpType::READ, offset, length);
+std::unique_ptr<CreateOp> CreateOp::generate(uint64_t size) {
+ return std::make_unique<CreateOp>(size);
}
-std::unique_ptr<IoOp> IoOp
- ::generate_read2(uint64_t offset1, uint64_t length1,
- uint64_t offset2, uint64_t length2) {
+std::string CreateOp::to_string(uint64_t block_size) const {
+ return "Create (size=" + value_to_string(size * block_size) + ")";
+}
- if (offset1 < offset2) {
- ceph_assert( offset1 + length1 <= offset2 );
- } else {
- ceph_assert( offset2 + length2 <= offset1 );
- }
+RemoveOp::RemoveOp() : TestOp<OpType::Remove>() {}
- return std::make_unique<IoOp>(OpType::READ2,
- offset1, length1,
- offset2, length2);
+std::unique_ptr<RemoveOp> RemoveOp::generate() {
+ return std::make_unique<RemoveOp>();
}
-std::unique_ptr<IoOp> IoOp
- ::generate_read3(uint64_t offset1, uint64_t length1,
- uint64_t offset2, uint64_t length2,
- uint64_t offset3, uint64_t length3) {
+std::string RemoveOp::to_string(uint64_t block_size) const { return "Remove"; }
- if (offset1 < offset2) {
- ceph_assert( offset1 + length1 <= offset2 );
- } else {
- ceph_assert( offset2 + length2 <= offset1 );
+template <OpType opType, int numIOs>
+ceph::io_exerciser::ReadWriteOp<opType, numIOs>::ReadWriteOp(
+ std::array<uint64_t, numIOs>&& offset,
+ std::array<uint64_t, numIOs>&& length)
+ : TestOp<opType>(), offset(offset), length(length) {
+ auto compare = [](uint64_t offset1, uint64_t length1, uint64_t offset2,
+ uint64_t length2) {
+ if (offset1 < offset2) {
+ ceph_assert(offset1 + length1 <= offset2);
+ } else {
+ ceph_assert(offset2 + length2 <= offset1);
+ }
+ };
+
+ if (numIOs > 1) {
+ for (int i = 0; i < numIOs - 1; i++) {
+ for (int j = i + 1; j < numIOs; j++) {
+ compare(offset[i], length[i], offset[j], length[j]);
+ }
+ }
}
- if (offset1 < offset3) {
- ceph_assert( offset1 + length1 <= offset3 );
- } else {
- ceph_assert( offset3 + length3 <= offset1 );
+}
+
+template <OpType opType, int numIOs>
+std::string ceph::io_exerciser::ReadWriteOp<opType, numIOs>::to_string(
+ uint64_t block_size) const {
+ std::string offset_length_desc;
+ if (numIOs > 0) {
+ offset_length_desc += fmt::format(
+ "offset1={}", value_to_string(this->offset[0] * block_size));
+ offset_length_desc += fmt::format(
+ ",length1={}", value_to_string(this->length[0] * block_size));
+ for (int i = 1; i < numIOs; i++) {
+ offset_length_desc += fmt::format(
+ ",offset{}={}", i + 1, value_to_string(this->offset[i] * block_size));
+ offset_length_desc += fmt::format(
+ ",length{}={}", i + 1, value_to_string(this->length[i] * block_size));
+ }
}
- if (offset2 < offset3) {
- ceph_assert( offset2 + length2 <= offset3 );
- } else {
- ceph_assert( offset3 + length3 <= offset2 );
+ switch (opType) {
+ case OpType::Read:
+ [[fallthrough]];
+ case OpType::Read2:
+ [[fallthrough]];
+ case OpType::Read3:
+ return fmt::format("Read{} ({})", numIOs, offset_length_desc);
+ case OpType::Write:
+ [[fallthrough]];
+ case OpType::Write2:
+ [[fallthrough]];
+ case OpType::Write3:
+ return fmt::format("Write{} ({})", numIOs, offset_length_desc);
+ case OpType::FailedWrite:
+ [[fallthrough]];
+ case OpType::FailedWrite2:
+ [[fallthrough]];
+ case OpType::FailedWrite3:
+ return fmt::format("FailedWrite{} ({})", numIOs, offset_length_desc);
+ default:
+ ceph_abort_msg(
+ fmt::format("Unsupported op type by ReadWriteOp ({})", opType));
}
- return std::make_unique<IoOp>(OpType::READ3,
- offset1, length1,
- offset2, length2,
- offset3, length3);
}
-std::unique_ptr<IoOp> IoOp::generate_write(uint64_t offset, uint64_t length) {
- return std::make_unique<IoOp>(OpType::WRITE, offset, length);
+SingleReadOp::SingleReadOp(uint64_t offset, uint64_t length)
+ : ReadWriteOp<OpType::Read, 1>({offset}, {length}) {}
+
+std::unique_ptr<SingleReadOp> SingleReadOp::generate(uint64_t offset,
+ uint64_t length) {
+ return std::make_unique<SingleReadOp>(offset, length);
}
-std::unique_ptr<IoOp> IoOp::generate_write2(uint64_t offset1, uint64_t length1,
- uint64_t offset2, uint64_t length2) {
- if (offset1 < offset2) {
- ceph_assert( offset1 + length1 <= offset2 );
- } else {
- ceph_assert( offset2 + length2 <= offset1 );
- }
- return std::make_unique<IoOp>(OpType::WRITE2,
- offset1, length1,
- offset2, length2);
+DoubleReadOp::DoubleReadOp(uint64_t offset1, uint64_t length1, uint64_t offset2,
+ uint64_t length2)
+ : ReadWriteOp<OpType::Read2, 2>({offset1, offset2}, {length1, length2}) {}
+
+std::unique_ptr<DoubleReadOp> DoubleReadOp::generate(uint64_t offset1,
+ uint64_t length1,
+ uint64_t offset2,
+ uint64_t length2) {
+ return std::make_unique<DoubleReadOp>(offset1, length1, offset2, length2);
}
-std::unique_ptr<IoOp> IoOp::generate_write3(uint64_t offset1, uint64_t length1,
- uint64_t offset2, uint64_t length2,
- uint64_t offset3, uint64_t length3) {
- if (offset1 < offset2) {
- ceph_assert( offset1 + length1 <= offset2 );
- } else {
- ceph_assert( offset2 + length2 <= offset1 );
- }
- if (offset1 < offset3) {
- ceph_assert( offset1 + length1 <= offset3 );
- } else {
- ceph_assert( offset3 + length3 <= offset1 );
- }
- if (offset2 < offset3) {
- ceph_assert( offset2 + length2 <= offset3 );
- } else {
- ceph_assert( offset3 + length3 <= offset2 );
- }
- return std::make_unique<IoOp>(OpType::WRITE3,
- offset1, length1,
- offset2, length2,
- offset3, length3);
-}
-
-bool IoOp::done() {
- return (op == OpType::Done);
-}
-
-std::string IoOp::to_string(uint64_t block_size) const
-{
- switch (op) {
- case OpType::Done:
- return "Done";
- case OpType::BARRIER:
- return "Barrier";
- case OpType::CREATE:
- return "Create (size=" + value_to_string(length1 * block_size) + ")";
- case OpType::REMOVE:
- return "Remove";
- case OpType::READ:
- return "Read (offset=" + value_to_string(offset1 * block_size) +
- ",length=" + value_to_string(length1 * block_size) + ")";
- case OpType::READ2:
- return "Read2 (offset1=" + value_to_string(offset1 * block_size) +
- ",length1=" + value_to_string(length1 * block_size) +
- ",offset2=" + value_to_string(offset2 * block_size) +
- ",length2=" + value_to_string(length2 * block_size) + ")";
- case OpType::READ3:
- return "Read3 (offset1=" + value_to_string(offset1 * block_size) +
- ",length1=" + value_to_string(length1 * block_size) +
- ",offset2=" + value_to_string(offset2 * block_size) +
- ",length2=" + value_to_string(length2 * block_size) +
- ",offset3=" + value_to_string(offset3 * block_size) +
- ",length3=" + value_to_string(length3 * block_size) + ")";
- case OpType::WRITE:
- return "Write (offset=" + value_to_string(offset1 * block_size) +
- ",length=" + value_to_string(length1 * block_size) + ")";
- case OpType::WRITE2:
- return "Write2 (offset1=" + value_to_string(offset1 * block_size) +
- ",length1=" + value_to_string(length1 * block_size) +
- ",offset2=" + value_to_string(offset2 * block_size) +
- ",length2=" + value_to_string(length2 * block_size) + ")";
- case OpType::WRITE3:
- return "Write3 (offset1=" + value_to_string(offset1 * block_size) +
- ",length1=" + value_to_string(length1 * block_size) +
- ",offset2=" + value_to_string(offset2 * block_size) +
- ",length2=" + value_to_string(length2 * block_size) +
- ",offset3=" + value_to_string(offset3 * block_size) +
- ",length3=" + value_to_string(length3 * block_size) + ")";
- default:
- break;
- }
- return "Unknown";
+TripleReadOp::TripleReadOp(uint64_t offset1, uint64_t length1, uint64_t offset2,
+ uint64_t length2, uint64_t offset3, uint64_t length3)
+ : ReadWriteOp<OpType::Read3, 3>({offset1, offset2, offset3},
+ {length1, length2, length3}) {}
+
+std::unique_ptr<TripleReadOp> TripleReadOp::generate(
+ uint64_t offset1, uint64_t length1, uint64_t offset2, uint64_t length2,
+ uint64_t offset3, uint64_t length3) {
+ return std::make_unique<TripleReadOp>(offset1, length1, offset2, length2,
+ offset3, length3);
+}
+
+SingleWriteOp::SingleWriteOp(uint64_t offset, uint64_t length)
+ : ReadWriteOp<OpType::Write, 1>({offset}, {length}) {}
+
+std::unique_ptr<SingleWriteOp> SingleWriteOp::generate(uint64_t offset,
+ uint64_t length) {
+ return std::make_unique<SingleWriteOp>(offset, length);
+}
+
+DoubleWriteOp::DoubleWriteOp(uint64_t offset1, uint64_t length1,
+ uint64_t offset2, uint64_t length2)
+ : ReadWriteOp<OpType::Write2, 2>({offset1, offset2}, {length1, length2}) {}
+
+std::unique_ptr<DoubleWriteOp> DoubleWriteOp::generate(uint64_t offset1,
+ uint64_t length1,
+ uint64_t offset2,
+ uint64_t length2) {
+ return std::make_unique<DoubleWriteOp>(offset1, length1, offset2, length2);
+}
+
+TripleWriteOp::TripleWriteOp(uint64_t offset1, uint64_t length1,
+ uint64_t offset2, uint64_t length2,
+ uint64_t offset3, uint64_t length3)
+ : ReadWriteOp<OpType::Write3, 3>({offset1, offset2, offset3},
+ {length1, length2, length3}) {}
+
+std::unique_ptr<TripleWriteOp> TripleWriteOp::generate(
+ uint64_t offset1, uint64_t length1, uint64_t offset2, uint64_t length2,
+ uint64_t offset3, uint64_t length3) {
+ return std::make_unique<TripleWriteOp>(offset1, length1, offset2, length2,
+ offset3, length3);
+}
+
+SingleFailedWriteOp::SingleFailedWriteOp(uint64_t offset, uint64_t length)
+ : ReadWriteOp<OpType::FailedWrite, 1>({offset}, {length}) {}
+
+std::unique_ptr<SingleFailedWriteOp> SingleFailedWriteOp::generate(
+ uint64_t offset, uint64_t length) {
+ return std::make_unique<SingleFailedWriteOp>(offset, length);
+}
+
+DoubleFailedWriteOp::DoubleFailedWriteOp(uint64_t offset1, uint64_t length1,
+ uint64_t offset2, uint64_t length2)
+ : ReadWriteOp<OpType::FailedWrite2, 2>({offset1, offset2},
+ {length1, length2}) {}
+
+std::unique_ptr<DoubleFailedWriteOp> DoubleFailedWriteOp::generate(
+ uint64_t offset1, uint64_t length1, uint64_t offset2, uint64_t length2) {
+ return std::make_unique<DoubleFailedWriteOp>(offset1, length1, offset2,
+ length2);
+}
+
+TripleFailedWriteOp::TripleFailedWriteOp(uint64_t offset1, uint64_t length1,
+ uint64_t offset2, uint64_t length2,
+ uint64_t offset3, uint64_t length3)
+ : ReadWriteOp<OpType::FailedWrite3, 3>({offset1, offset2, offset3},
+ {length1, length2, length3}) {}
+
+std::unique_ptr<TripleFailedWriteOp> TripleFailedWriteOp::generate(
+ uint64_t offset1, uint64_t length1, uint64_t offset2, uint64_t length2,
+ uint64_t offset3, uint64_t length3) {
+ return std::make_unique<TripleFailedWriteOp>(offset1, length1, offset2,
+ length2, offset3, length3);
+}
+
+template <ceph::io_exerciser::OpType opType>
+ceph::io_exerciser::InjectErrorOp<opType>::InjectErrorOp(
+ int shard, const std::optional<uint64_t>& type,
+ const std::optional<uint64_t>& when,
+ const std::optional<uint64_t>& duration)
+ : TestOp<opType>(),
+ shard(shard),
+ type(type),
+ when(when),
+ duration(duration) {}
+
+template <ceph::io_exerciser::OpType opType>
+std::string ceph::io_exerciser::InjectErrorOp<opType>::to_string(
+ uint64_t blocksize) const {
+ std::string_view inject_type = get_inject_type_string();
+ return fmt::format(
+ "Inject {} error on shard {} of type {}"
+ " after {} successful inject(s) lasting {} inject(s)",
+ inject_type, shard, type.value_or(0), when.value_or(0),
+ duration.value_or(1));
+}
+
+ceph::io_exerciser::InjectReadErrorOp::InjectReadErrorOp(
+ int shard, const std::optional<uint64_t>& type,
+ const std::optional<uint64_t>& when,
+ const std::optional<uint64_t>& duration)
+ : InjectErrorOp<OpType::InjectReadError>(shard, type, when, duration) {}
+
+std::unique_ptr<ceph::io_exerciser::InjectReadErrorOp>
+ceph::io_exerciser ::InjectReadErrorOp::generate(
+ int shard, const std::optional<uint64_t>& type,
+ const std::optional<uint64_t>& when,
+ const std::optional<uint64_t>& duration) {
+ return std::make_unique<InjectReadErrorOp>(shard, type, when, duration);
+}
+
+ceph::io_exerciser::InjectWriteErrorOp::InjectWriteErrorOp(
+ int shard, const std::optional<uint64_t>& type,
+ const std::optional<uint64_t>& when,
+ const std::optional<uint64_t>& duration)
+ : InjectErrorOp<OpType::InjectWriteError>(shard, type, when, duration) {}
+
+std::unique_ptr<ceph::io_exerciser::InjectWriteErrorOp>
+ceph::io_exerciser ::InjectWriteErrorOp::generate(
+ int shard, const std::optional<uint64_t>& type,
+ const std::optional<uint64_t>& when,
+ const std::optional<uint64_t>& duration) {
+ return std::make_unique<InjectWriteErrorOp>(shard, type, when, duration);
+}
+
+template <ceph::io_exerciser::OpType opType>
+ceph::io_exerciser::ClearErrorInjectOp<opType>::ClearErrorInjectOp(
+ int shard, const std::optional<uint64_t>& type)
+ : TestOp<opType>(), shard(shard), type(type) {}
+
+template <ceph::io_exerciser::OpType opType>
+std::string ceph::io_exerciser::ClearErrorInjectOp<opType>::to_string(
+ uint64_t blocksize) const {
+ std::string_view inject_type = get_inject_type_string();
+ return fmt::format("Clear {} injects on shard {} of type {}", inject_type,
+ shard, type.value_or(0));
+}
+
+ceph::io_exerciser::ClearReadErrorInjectOp::ClearReadErrorInjectOp(
+ int shard, const std::optional<uint64_t>& type)
+ : ClearErrorInjectOp<OpType::ClearReadErrorInject>(shard, type) {}
+
+std::unique_ptr<ceph::io_exerciser::ClearReadErrorInjectOp>
+ceph::io_exerciser ::ClearReadErrorInjectOp::generate(
+ int shard, const std::optional<uint64_t>& type) {
+ return std::make_unique<ClearReadErrorInjectOp>(shard, type);
+}
+
+ceph::io_exerciser::ClearWriteErrorInjectOp::ClearWriteErrorInjectOp(
+ int shard, const std::optional<uint64_t>& type)
+ : ClearErrorInjectOp<OpType::ClearWriteErrorInject>(shard, type) {}
+
+std::unique_ptr<ceph::io_exerciser::ClearWriteErrorInjectOp>
+ceph::io_exerciser ::ClearWriteErrorInjectOp::generate(
+ int shard, const std::optional<uint64_t>& type) {
+ return std::make_unique<ClearWriteErrorInjectOp>(shard, type);
} \ No newline at end of file
diff --git a/src/common/io_exerciser/IoOp.h b/src/common/io_exerciser/IoOp.h
index 60c02a93d4e..1887eafcc1f 100644
--- a/src/common/io_exerciser/IoOp.h
+++ b/src/common/io_exerciser/IoOp.h
@@ -1,94 +1,248 @@
#pragma once
-#include <string>
+#include <array>
#include <memory>
-#include "include/ceph_assert.h"
+#include <optional>
+#include <string>
+
+#include "OpType.h"
/* Overview
*
- * enum OpType
- * Enumeration of different types of I/O operation
- *
* class IoOp
* Stores details for an I/O operation. Generated by IoSequences
* and applied by IoExerciser's
*/
namespace ceph {
- namespace io_exerciser {
-
- enum class OpType {
- Done, // End of I/O sequence
- BARRIER, // Barrier - all prior I/Os must complete
- CREATE, // Create object and pattern with data
- REMOVE, // Remove object
- READ, // Read
- READ2, // 2 Reads in one op
- READ3, // 3 Reads in one op
- WRITE, // Write
- WRITE2, // 2 Writes in one op
- WRITE3 // 3 Writes in one op
- };
-
- class IoOp {
- protected:
- std::string value_to_string(uint64_t v) const;
-
- public:
- OpType op;
- uint64_t offset1;
- uint64_t length1;
- uint64_t offset2;
- uint64_t length2;
- uint64_t offset3;
- uint64_t length3;
-
- IoOp( OpType op,
- uint64_t offset1 = 0, uint64_t length1 = 0,
- uint64_t offset2 = 0, uint64_t length2 = 0,
- uint64_t offset3 = 0, uint64_t length3 = 0 );
-
- static std::unique_ptr<IoOp> generate_done();
-
- static std::unique_ptr<IoOp> generate_barrier();
-
- static std::unique_ptr<IoOp> generate_create(uint64_t size);
-
- static std::unique_ptr<IoOp> generate_remove();
-
- static std::unique_ptr<IoOp> generate_read(uint64_t offset,
+namespace io_exerciser {
+
+class IoOp {
+ public:
+ IoOp();
+ virtual ~IoOp() = default;
+ virtual std::string to_string(uint64_t block_size) const = 0;
+ virtual constexpr OpType getOpType() const = 0;
+};
+
+template <OpType opType>
+class TestOp : public IoOp {
+ public:
+ TestOp();
+ constexpr OpType getOpType() const override { return opType; }
+};
+
+class DoneOp : public TestOp<OpType::Done> {
+ public:
+ DoneOp();
+ static std::unique_ptr<DoneOp> generate();
+ std::string to_string(uint64_t block_size) const override;
+};
+
+class BarrierOp : public TestOp<OpType::Barrier> {
+ public:
+ BarrierOp();
+ static std::unique_ptr<BarrierOp> generate();
+ std::string to_string(uint64_t block_size) const override;
+};
+
+class CreateOp : public TestOp<OpType::Create> {
+ public:
+ CreateOp(uint64_t size);
+ static std::unique_ptr<CreateOp> generate(uint64_t size);
+ std::string to_string(uint64_t block_size) const override;
+ uint64_t size;
+};
+
+class RemoveOp : public TestOp<OpType::Remove> {
+ public:
+ RemoveOp();
+ static std::unique_ptr<RemoveOp> generate();
+ std::string to_string(uint64_t block_size) const override;
+};
+
+template <OpType opType, int numIOs>
+class ReadWriteOp : public TestOp<opType> {
+ public:
+ std::array<uint64_t, numIOs> offset;
+ std::array<uint64_t, numIOs> length;
+
+ protected:
+ ReadWriteOp(std::array<uint64_t, numIOs>&& offset,
+ std::array<uint64_t, numIOs>&& length);
+ std::string to_string(uint64_t block_size) const override;
+};
+
+class SingleReadOp : public ReadWriteOp<OpType::Read, 1> {
+ public:
+ SingleReadOp(uint64_t offset, uint64_t length);
+ static std::unique_ptr<SingleReadOp> generate(uint64_t offset,
+ uint64_t length);
+};
+
+class DoubleReadOp : public ReadWriteOp<OpType::Read2, 2> {
+ public:
+ DoubleReadOp(uint64_t offset1, uint64_t length1, uint64_t offset2,
+ uint64_t length2);
+ static std::unique_ptr<DoubleReadOp> generate(uint64_t offset1,
+ uint64_t length1,
+ uint64_t offset2,
+ uint64_t length2);
+};
+
+class TripleReadOp : public ReadWriteOp<OpType::Read3, 3> {
+ public:
+ TripleReadOp(uint64_t offset1, uint64_t length1, uint64_t offset2,
+ uint64_t length2, uint64_t offset3, uint64_t length3);
+ static std::unique_ptr<TripleReadOp> generate(
+ uint64_t offset1, uint64_t length1, uint64_t offset2, uint64_t length2,
+ uint64_t offset3, uint64_t length3);
+};
+
+class SingleWriteOp : public ReadWriteOp<OpType::Write, 1> {
+ public:
+ SingleWriteOp(uint64_t offset, uint64_t length);
+ static std::unique_ptr<SingleWriteOp> generate(uint64_t offset,
uint64_t length);
+};
+
+class DoubleWriteOp : public ReadWriteOp<OpType::Write2, 2> {
+ public:
+ DoubleWriteOp(uint64_t offset1, uint64_t length1, uint64_t offset2,
+ uint64_t length2);
+ static std::unique_ptr<DoubleWriteOp> generate(uint64_t offset1,
+ uint64_t length1,
+ uint64_t offset2,
+ uint64_t length2);
+};
+
+class TripleWriteOp : public ReadWriteOp<OpType::Write3, 3> {
+ public:
+ TripleWriteOp(uint64_t offset1, uint64_t length1, uint64_t offset2,
+ uint64_t length2, uint64_t offset3, uint64_t length3);
+ static std::unique_ptr<TripleWriteOp> generate(
+ uint64_t offset1, uint64_t length1, uint64_t offset2, uint64_t length2,
+ uint64_t offset3, uint64_t length3);
+};
+
+class SingleFailedWriteOp : public ReadWriteOp<OpType::FailedWrite, 1> {
+ public:
+ SingleFailedWriteOp(uint64_t offset, uint64_t length);
+ static std::unique_ptr<SingleFailedWriteOp> generate(uint64_t offset,
+ uint64_t length);
+};
+
+class DoubleFailedWriteOp : public ReadWriteOp<OpType::FailedWrite2, 2> {
+ public:
+ DoubleFailedWriteOp(uint64_t offset1, uint64_t length1, uint64_t offset2,
+ uint64_t length2);
+ static std::unique_ptr<DoubleFailedWriteOp> generate(uint64_t offset1,
+ uint64_t length1,
+ uint64_t offset2,
+ uint64_t length2);
+};
+
+class TripleFailedWriteOp : public ReadWriteOp<OpType::FailedWrite3, 3> {
+ public:
+ TripleFailedWriteOp(uint64_t offset1, uint64_t length1, uint64_t offset2,
+ uint64_t length2, uint64_t offset3, uint64_t length3);
+ static std::unique_ptr<TripleFailedWriteOp> generate(
+ uint64_t offset1, uint64_t length1, uint64_t offset2, uint64_t length2,
+ uint64_t offset3, uint64_t length3);
+};
+
+template <ceph::io_exerciser::OpType opType>
+class InjectErrorOp : public TestOp<opType> {
+ public:
+ InjectErrorOp(int shard, const std::optional<uint64_t>& type,
+ const std::optional<uint64_t>& when,
+ const std::optional<uint64_t>& duration);
+
+ std::string to_string(uint64_t block_size) const override;
+
+ int shard;
+ std::optional<uint64_t> type;
+ std::optional<uint64_t> when;
+ std::optional<uint64_t> duration;
+
+ protected:
+ virtual inline constexpr std::string_view get_inject_type_string() const = 0;
+};
+
+class InjectReadErrorOp : public InjectErrorOp<OpType::InjectReadError> {
+ public:
+ InjectReadErrorOp(int shard, const std::optional<uint64_t>& type,
+ const std::optional<uint64_t>& when,
+ const std::optional<uint64_t>& duration);
+
+ static std::unique_ptr<InjectReadErrorOp> generate(
+ int shard, const std::optional<uint64_t>& type,
+ const std::optional<uint64_t>& when,
+ const std::optional<uint64_t>& duration);
+
+ protected:
+ inline constexpr std::string_view get_inject_type_string() const override {
+ return "read";
+ }
+};
+
+class InjectWriteErrorOp : public InjectErrorOp<OpType::InjectWriteError> {
+ public:
+ InjectWriteErrorOp(int shard, const std::optional<uint64_t>& type,
+ const std::optional<uint64_t>& when,
+ const std::optional<uint64_t>& duration);
+
+ static std::unique_ptr<InjectWriteErrorOp> generate(
+ int shard, const std::optional<uint64_t>& type,
+ const std::optional<uint64_t>& when,
+ const std::optional<uint64_t>& duration);
+
+ protected:
+ inline constexpr std::string_view get_inject_type_string() const override {
+ return "write";
+ }
+};
+
+template <ceph::io_exerciser::OpType opType>
+class ClearErrorInjectOp : public TestOp<opType> {
+ public:
+ ClearErrorInjectOp(int shard, const std::optional<uint64_t>& type);
+
+ std::string to_string(uint64_t block_size) const override;
+
+ int shard;
+ std::optional<uint64_t> type;
+
+ protected:
+ virtual inline constexpr std::string_view get_inject_type_string() const = 0;
+};
+
+class ClearReadErrorInjectOp
+ : public ClearErrorInjectOp<OpType::ClearReadErrorInject> {
+ public:
+ ClearReadErrorInjectOp(int shard, const std::optional<uint64_t>& type);
+
+ static std::unique_ptr<ClearReadErrorInjectOp> generate(
+ int shard, const std::optional<uint64_t>& type);
+
+ protected:
+ inline constexpr std::string_view get_inject_type_string() const override {
+ return "read";
+ }
+};
+
+class ClearWriteErrorInjectOp
+ : public ClearErrorInjectOp<OpType::ClearWriteErrorInject> {
+ public:
+ ClearWriteErrorInjectOp(int shard, const std::optional<uint64_t>& type);
+
+ static std::unique_ptr<ClearWriteErrorInjectOp> generate(
+ int shard, const std::optional<uint64_t>& type);
- static std::unique_ptr<IoOp> generate_read2(uint64_t offset1,
- uint64_t length1,
- uint64_t offset2,
- uint64_t length2);
-
- static std::unique_ptr<IoOp> generate_read3(uint64_t offset1,
- uint64_t length1,
- uint64_t offset2,
- uint64_t length2,
- uint64_t offset3,
- uint64_t length3);
-
- static std::unique_ptr<IoOp> generate_write(uint64_t offset,
- uint64_t length);
-
- static std::unique_ptr<IoOp> generate_write2(uint64_t offset1,
- uint64_t length1,
- uint64_t offset2,
- uint64_t length2);
-
- static std::unique_ptr<IoOp> generate_write3(uint64_t offset1,
- uint64_t length1,
- uint64_t offset2,
- uint64_t length2,
- uint64_t offset3,
- uint64_t length3);
-
- bool done();
-
- std::string to_string(uint64_t block_size) const;
- };
+ protected:
+ inline constexpr std::string_view get_inject_type_string() const override {
+ return "write";
}
-} \ No newline at end of file
+};
+} // namespace io_exerciser
+} // namespace ceph \ No newline at end of file
diff --git a/src/common/io_exerciser/IoSequence.cc b/src/common/io_exerciser/IoSequence.cc
index 4a7ca0593d1..83f1cc595a5 100644
--- a/src/common/io_exerciser/IoSequence.cc
+++ b/src/common/io_exerciser/IoSequence.cc
@@ -1,12 +1,12 @@
#include "IoSequence.h"
+using IoOp = ceph::io_exerciser::IoOp;
using Sequence = ceph::io_exerciser::Sequence;
using IoSequence = ceph::io_exerciser::IoSequence;
-std::ostream& ceph::io_exerciser::operator<<(std::ostream& os, const Sequence& seq)
-{
- switch (seq)
- {
+std::ostream& ceph::io_exerciser::operator<<(std::ostream& os,
+ const Sequence& seq) {
+ switch (seq) {
case Sequence::SEQUENCE_SEQ0:
os << "SEQUENCE_SEQ0";
break;
@@ -37,6 +37,9 @@ std::ostream& ceph::io_exerciser::operator<<(std::ostream& os, const Sequence& s
case Sequence::SEQUENCE_SEQ9:
os << "SEQUENCE_SEQ9";
break;
+ case Sequence::SEQUENCE_SEQ10:
+ os << "SEQUENCE_SEQ10";
+ break;
case Sequence::SEQUENCE_END:
os << "SEQUENCE_END";
break;
@@ -44,19 +47,12 @@ std::ostream& ceph::io_exerciser::operator<<(std::ostream& os, const Sequence& s
return os;
}
-IoSequence::IoSequence(std::pair<int,int> obj_size_range,
- int seed) :
- min_obj_size(obj_size_range.first), max_obj_size(obj_size_range.second),
- create(true), barrier(false), done(false), remove(false),
- obj_size(min_obj_size), step(-1), seed(seed)
-{
- rng.seed(seed);
+bool IoSequence::is_supported(Sequence sequence) const {
+ return sequence != Sequence::SEQUENCE_SEQ10;
}
-std::unique_ptr<IoSequence> IoSequence::generate_sequence(Sequence s,
- std::pair<int,int> obj_size_range,
- int seed)
-{
+std::unique_ptr<IoSequence> IoSequence::generate_sequence(
+ Sequence s, std::pair<int, int> obj_size_range, int seed) {
switch (s) {
case Sequence::SEQUENCE_SEQ0:
return std::make_unique<Seq0>(obj_size_range, seed);
@@ -78,24 +74,39 @@ std::unique_ptr<IoSequence> IoSequence::generate_sequence(Sequence s,
return std::make_unique<Seq8>(obj_size_range, seed);
case Sequence::SEQUENCE_SEQ9:
return std::make_unique<Seq9>(obj_size_range, seed);
+ case Sequence::SEQUENCE_SEQ10:
+ ceph_abort_msg(
+ "Sequence 10 only supported for erasure coded pools "
+ "through the EcIoSequence interface");
+ return nullptr;
default:
break;
}
return nullptr;
}
-int IoSequence::get_step() const
-{
- return step;
+IoSequence::IoSequence(std::pair<int, int> obj_size_range, int seed)
+ : min_obj_size(obj_size_range.first),
+ max_obj_size(obj_size_range.second),
+ create(true),
+ barrier(false),
+ done(false),
+ remove(false),
+ obj_size(min_obj_size),
+ step(-1),
+ seed(seed) {
+ rng.seed(seed);
}
-int IoSequence::get_seed() const
-{
- return seed;
+std::string ceph::io_exerciser::IoSequence::get_name_with_seqseed() const {
+ return get_name() + " (seqseed " + std::to_string(get_seed()) + ")";
}
-void IoSequence::set_min_object_size(uint64_t size)
-{
+int IoSequence::get_step() const { return step; }
+
+int IoSequence::get_seed() const { return seed; }
+
+void IoSequence::set_min_object_size(uint64_t size) {
min_obj_size = size;
if (obj_size < size) {
obj_size = size;
@@ -105,23 +116,20 @@ void IoSequence::set_min_object_size(uint64_t size)
}
}
-void IoSequence::set_max_object_size(uint64_t size)
-{
+void IoSequence::set_max_object_size(uint64_t size) {
max_obj_size = size;
if (obj_size > size) {
done = true;
}
}
-void IoSequence::select_random_object_size()
-{
+void IoSequence::select_random_object_size() {
if (max_obj_size != min_obj_size) {
obj_size = min_obj_size + rng(max_obj_size - min_obj_size);
}
}
-std::unique_ptr<ceph::io_exerciser::IoOp> IoSequence::increment_object_size()
-{
+std::unique_ptr<IoOp> IoSequence::increment_object_size() {
obj_size++;
if (obj_size > max_obj_size) {
done = true;
@@ -129,106 +137,118 @@ std::unique_ptr<ceph::io_exerciser::IoOp> IoSequence::increment_object_size()
create = true;
barrier = true;
remove = true;
- return IoOp::generate_barrier();
+ return BarrierOp::generate();
}
-std::unique_ptr<ceph::io_exerciser::IoOp> IoSequence::next()
-{
+Sequence IoSequence::getNextSupportedSequenceId() const {
+ Sequence sequence = get_id();
+ ++sequence;
+ for (; sequence < Sequence::SEQUENCE_END; ++sequence) {
+ if (is_supported(sequence)) {
+ return sequence;
+ }
+ }
+
+ return Sequence::SEQUENCE_END;
+}
+
+std::unique_ptr<IoOp> IoSequence::next() {
step++;
if (remove) {
remove = false;
- return IoOp::generate_remove();
+ return RemoveOp::generate();
}
if (barrier) {
barrier = false;
- return IoOp::generate_barrier();
+ return BarrierOp::generate();
}
if (done) {
- return IoOp::generate_done();
+ return DoneOp::generate();
}
if (create) {
create = false;
barrier = true;
- return IoOp::generate_create(obj_size);
+ return CreateOp::generate(obj_size);
}
return _next();
}
-
-
-ceph::io_exerciser::Seq0::Seq0(std::pair<int,int> obj_size_range, int seed) :
- IoSequence(obj_size_range, seed), offset(0)
-{
+ceph::io_exerciser::Seq0::Seq0(std::pair<int, int> obj_size_range, int seed)
+ : IoSequence(obj_size_range, seed), offset(0) {
select_random_object_size();
length = 1 + rng(obj_size - 1);
}
-std::string ceph::io_exerciser::Seq0::get_name() const
-{
+Sequence ceph::io_exerciser::Seq0::get_id() const {
+ return Sequence::SEQUENCE_SEQ0;
+}
+
+std::string ceph::io_exerciser::Seq0::get_name() const {
return "Sequential reads of length " + std::to_string(length) +
- " with queue depth 1 (seqseed " + std::to_string(get_seed()) + ")";
+ " with queue depth 1";
}
-std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq0::_next()
-{
+std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq0::_next() {
std::unique_ptr<IoOp> r;
if (offset >= obj_size) {
done = true;
barrier = true;
remove = true;
- return IoOp::generate_barrier();
+ return BarrierOp::generate();
}
if (offset + length > obj_size) {
- r = IoOp::generate_read(offset, obj_size - offset);
+ r = SingleReadOp::generate(offset, obj_size - offset);
} else {
- r = IoOp::generate_read(offset, length);
+ r = SingleReadOp::generate(offset, length);
}
offset += length;
return r;
}
-
-
-ceph::io_exerciser::Seq1::Seq1(std::pair<int,int> obj_size_range, int seed) :
- IoSequence(obj_size_range, seed)
-{
+ceph::io_exerciser::Seq1::Seq1(std::pair<int, int> obj_size_range, int seed)
+ : IoSequence(obj_size_range, seed) {
select_random_object_size();
count = 3 * obj_size;
}
-std::string ceph::io_exerciser::Seq1::get_name() const
-{
- return "Random offset, random length read/write I/O with queue depth 1 (seqseed "
- + std::to_string(get_seed()) + ")";
+Sequence ceph::io_exerciser::Seq1::get_id() const {
+ return Sequence::SEQUENCE_SEQ1;
}
-std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq1::_next()
-{
+std::string ceph::io_exerciser::Seq1::get_name() const {
+ return "Random offset, random length read/write I/O with queue depth 1";
+}
+
+std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq1::_next() {
barrier = true;
if (count-- == 0) {
done = true;
remove = true;
- return IoOp::generate_barrier();
+ return BarrierOp::generate();
}
uint64_t offset = rng(obj_size - 1);
uint64_t length = 1 + rng(obj_size - 1 - offset);
- return (rng(2) != 0) ? IoOp::generate_write(offset, length) :
- IoOp::generate_read(offset, length);
-}
+ if (rng(2) != 0) {
+ return SingleWriteOp::generate(offset, length);
+ } else {
+ return SingleReadOp::generate(offset, length);
+ }
+}
+ceph::io_exerciser::Seq2::Seq2(std::pair<int, int> obj_size_range, int seed)
+ : IoSequence(obj_size_range, seed), offset(0), length(0) {}
-ceph::io_exerciser::Seq2::Seq2(std::pair<int,int> obj_size_range, int seed) :
- IoSequence(obj_size_range, seed), offset(0), length(0) {}
+Sequence ceph::io_exerciser::Seq2::get_id() const {
+ return Sequence::SEQUENCE_SEQ2;
+}
-std::string ceph::io_exerciser::Seq2::get_name() const
-{
+std::string ceph::io_exerciser::Seq2::get_name() const {
return "Permutations of offset and length read I/O";
}
-std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq2::_next()
-{
+std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq2::_next() {
length++;
if (length > obj_size - offset) {
length = 1;
@@ -239,24 +259,23 @@ std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq2::_next()
return increment_object_size();
}
}
- return IoOp::generate_read(offset, length);
+ return SingleReadOp::generate(offset, length);
}
-
-
-ceph::io_exerciser::Seq3::Seq3(std::pair<int,int> obj_size_range, int seed) :
- IoSequence(obj_size_range, seed), offset1(0), offset2(0)
-{
+ceph::io_exerciser::Seq3::Seq3(std::pair<int, int> obj_size_range, int seed)
+ : IoSequence(obj_size_range, seed), offset1(0), offset2(0) {
set_min_object_size(2);
}
-std::string ceph::io_exerciser::Seq3::get_name() const
-{
+Sequence ceph::io_exerciser::Seq3::get_id() const {
+ return Sequence::SEQUENCE_SEQ3;
+}
+
+std::string ceph::io_exerciser::Seq3::get_name() const {
return "Permutations of offset 2-region 1-block read I/O";
}
-std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq3::_next()
-{
+std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq3::_next() {
offset2++;
if (offset2 >= obj_size - offset1) {
offset2 = 1;
@@ -267,24 +286,23 @@ std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq3::_next()
return increment_object_size();
}
}
- return IoOp::generate_read2(offset1, 1, offset1 + offset2, 1);
+ return DoubleReadOp::generate(offset1, 1, offset1 + offset2, 1);
}
-
-
-ceph::io_exerciser::Seq4::Seq4(std::pair<int,int> obj_size_range, int seed) :
- IoSequence(obj_size_range, seed), offset1(0), offset2(1)
-{
+ceph::io_exerciser::Seq4::Seq4(std::pair<int, int> obj_size_range, int seed)
+ : IoSequence(obj_size_range, seed), offset1(0), offset2(1) {
set_min_object_size(3);
}
-std::string ceph::io_exerciser::Seq4::get_name() const
-{
+Sequence ceph::io_exerciser::Seq4::get_id() const {
+ return Sequence::SEQUENCE_SEQ4;
+}
+
+std::string ceph::io_exerciser::Seq4::get_name() const {
return "Permutations of offset 3-region 1-block read I/O";
}
-std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq4::_next()
-{
+std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq4::_next() {
offset2++;
if (offset2 >= obj_size - offset1) {
offset2 = 2;
@@ -295,33 +313,35 @@ std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq4::_next()
return increment_object_size();
}
}
- return IoOp::generate_read3(offset1, 1,
- offset1 + offset2, 1,
- (offset1 * 2 + offset2)/2, 1);
+ return TripleReadOp::generate(offset1, 1, (offset1 + offset2), 1,
+ (offset1 * 2 + offset2) / 2, 1);
}
+ceph::io_exerciser::Seq5::Seq5(std::pair<int, int> obj_size_range, int seed)
+ : IoSequence(obj_size_range, seed),
+ offset(0),
+ length(1),
+ doneread(false),
+ donebarrier(false) {}
+Sequence ceph::io_exerciser::Seq5::get_id() const {
+ return Sequence::SEQUENCE_SEQ5;
+}
-ceph::io_exerciser::Seq5::Seq5(std::pair<int,int> obj_size_range, int seed) :
- IoSequence(obj_size_range, seed), offset(0), length(1),
- doneread(false), donebarrier(false) {}
-
-std::string ceph::io_exerciser::Seq5::get_name() const
-{
+std::string ceph::io_exerciser::Seq5::get_name() const {
return "Permutation of length sequential writes";
}
-std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq5::_next()
-{
+std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq5::_next() {
if (offset >= obj_size) {
if (!doneread) {
if (!donebarrier) {
donebarrier = true;
- return IoOp::generate_barrier();
+ return BarrierOp::generate();
}
doneread = true;
barrier = true;
- return IoOp::generate_read(0, obj_size);
+ return SingleReadOp::generate(0, obj_size);
}
doneread = false;
donebarrier = false;
@@ -333,33 +353,36 @@ std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq5::_next()
}
}
uint64_t io_len = (offset + length > obj_size) ? (obj_size - offset) : length;
- std::unique_ptr<IoOp> r = IoOp::generate_write(offset, io_len);
+ std::unique_ptr<IoOp> r = SingleWriteOp::generate(offset, io_len);
offset += io_len;
return r;
}
+ceph::io_exerciser::Seq6::Seq6(std::pair<int, int> obj_size_range, int seed)
+ : IoSequence(obj_size_range, seed),
+ offset(0),
+ length(1),
+ doneread(false),
+ donebarrier(false) {}
+Sequence ceph::io_exerciser::Seq6::get_id() const {
+ return Sequence::SEQUENCE_SEQ6;
+}
-ceph::io_exerciser::Seq6::Seq6(std::pair<int,int> obj_size_range, int seed) :
- IoSequence(obj_size_range, seed), offset(0), length(1),
- doneread(false), donebarrier(false) {}
-
-std::string ceph::io_exerciser::Seq6::get_name() const
-{
+std::string ceph::io_exerciser::Seq6::get_name() const {
return "Permutation of length sequential writes, different alignment";
}
-std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq6::_next()
-{
+std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq6::_next() {
if (offset >= obj_size) {
if (!doneread) {
if (!donebarrier) {
donebarrier = true;
- return IoOp::generate_barrier();
+ return BarrierOp::generate();
}
doneread = true;
barrier = true;
- return IoOp::generate_read(0, obj_size);
+ return SingleReadOp::generate(0, obj_size);
}
doneread = false;
donebarrier = false;
@@ -374,74 +397,72 @@ std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq6::_next()
if (io_len == 0) {
io_len = length;
}
- std::unique_ptr<IoOp> r = IoOp::generate_write(offset, io_len);
+ std::unique_ptr<IoOp> r = SingleWriteOp::generate(offset, io_len);
offset += io_len;
return r;
}
-
-
-ceph::io_exerciser::Seq7::Seq7(std::pair<int,int> obj_size_range, int seed) :
- IoSequence(obj_size_range, seed)
-{
+ceph::io_exerciser::Seq7::Seq7(std::pair<int, int> obj_size_range, int seed)
+ : IoSequence(obj_size_range, seed) {
set_min_object_size(2);
offset = obj_size;
}
-std::string ceph::io_exerciser::Seq7::get_name() const
-{
+Sequence ceph::io_exerciser::Seq7::get_id() const {
+ return Sequence::SEQUENCE_SEQ7;
+}
+
+std::string ceph::io_exerciser::Seq7::get_name() const {
return "Permutations of offset 2-region 1-block writes";
}
-std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq7::_next()
-{
+std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq7::_next() {
if (!doneread) {
if (!donebarrier) {
donebarrier = true;
- return IoOp::generate_barrier();
+ return BarrierOp::generate();
}
doneread = true;
barrier = true;
- return IoOp::generate_read(0, obj_size);
+ return SingleReadOp::generate(0, obj_size);
}
if (offset == 0) {
doneread = false;
donebarrier = false;
- offset = obj_size+1;
+ offset = obj_size + 1;
return increment_object_size();
}
offset--;
- if (offset == obj_size/2) {
+ if (offset == obj_size / 2) {
return _next();
}
doneread = false;
donebarrier = false;
- return IoOp::generate_write2(offset, 1, obj_size/2, 1);
+ return DoubleReadOp::generate(offset, 1, obj_size / 2, 1);
}
-
-
-ceph::io_exerciser::Seq8::Seq8(std::pair<int,int> obj_size_range, int seed) :
- IoSequence(obj_size_range, seed), offset1(0), offset2(1)
-{
+ceph::io_exerciser::Seq8::Seq8(std::pair<int, int> obj_size_range, int seed)
+ : IoSequence(obj_size_range, seed), offset1(0), offset2(1) {
set_min_object_size(3);
}
-std::string ceph::io_exerciser::Seq8::get_name() const
-{
+Sequence ceph::io_exerciser::Seq8::get_id() const {
+ return Sequence::SEQUENCE_SEQ8;
+}
+
+std::string ceph::io_exerciser::Seq8::get_name() const {
return "Permutations of offset 3-region 1-block write I/O";
}
-std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq8::_next()
-{
+std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq8::_next() {
if (!doneread) {
if (!donebarrier) {
donebarrier = true;
- return IoOp::generate_barrier();
+ return BarrierOp::generate();
}
doneread = true;
barrier = true;
- return IoOp::generate_read(0, obj_size);
+ return SingleReadOp::generate(0, obj_size);
}
offset2++;
if (offset2 >= obj_size - offset1) {
@@ -455,34 +476,30 @@ std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq8::_next()
}
doneread = false;
donebarrier = false;
- return IoOp::generate_write3(offset1, 1,
- offset1 + offset2, 1,
- (offset1 * 2 + offset2)/2, 1);
+ return TripleWriteOp::generate(offset1, 1, offset1 + offset2, 1,
+ (offset1 * 2 + offset2) / 2, 1);
}
+ceph::io_exerciser::Seq9::Seq9(std::pair<int, int> obj_size_range, int seed)
+ : IoSequence(obj_size_range, seed), offset(0), length(0) {}
-
-ceph::io_exerciser::Seq9::Seq9(std::pair<int,int> obj_size_range, int seed) :
- IoSequence(obj_size_range, seed), offset(0), length(0)
-{
-
+Sequence ceph::io_exerciser::Seq9::get_id() const {
+ return Sequence::SEQUENCE_SEQ9;
}
-std::string ceph::io_exerciser::Seq9::get_name() const
-{
+std::string ceph::io_exerciser::Seq9::get_name() const {
return "Permutations of offset and length write I/O";
}
-std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq9::_next()
-{
+std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq9::_next() {
if (!doneread) {
if (!donebarrier) {
donebarrier = true;
- return IoOp::generate_barrier();
+ return BarrierOp::generate();
}
doneread = true;
barrier = true;
- return IoOp::generate_read(0, obj_size);
+ return SingleReadOp::generate(0, obj_size);
}
length++;
if (length > obj_size - offset) {
@@ -496,5 +513,5 @@ std::unique_ptr<ceph::io_exerciser::IoOp> ceph::io_exerciser::Seq9::_next()
}
doneread = false;
donebarrier = false;
- return IoOp::generate_write(offset, length);
+ return SingleWriteOp::generate(offset, length);
} \ No newline at end of file
diff --git a/src/common/io_exerciser/IoSequence.h b/src/common/io_exerciser/IoSequence.h
index 114ff76303f..b6c254cf096 100644
--- a/src/common/io_exerciser/IoSequence.h
+++ b/src/common/io_exerciser/IoSequence.h
@@ -3,7 +3,6 @@
#pragma once
#include "IoOp.h"
-
#include "include/random.h"
/* Overview
@@ -29,195 +28,209 @@
*/
namespace ceph {
- namespace io_exerciser {
-
- enum class Sequence {
- SEQUENCE_SEQ0,
- SEQUENCE_SEQ1,
- SEQUENCE_SEQ2,
- SEQUENCE_SEQ3,
- SEQUENCE_SEQ4,
- SEQUENCE_SEQ5,
- SEQUENCE_SEQ6,
- SEQUENCE_SEQ7,
- SEQUENCE_SEQ8,
- SEQUENCE_SEQ9,
- //
- SEQUENCE_END,
- SEQUENCE_BEGIN = SEQUENCE_SEQ0
- };
-
- inline Sequence operator++( Sequence& s )
- {
- return s = (Sequence)(((int)(s) + 1));
- }
-
- std::ostream& operator<<(std::ostream& os, const Sequence& seq);
-
- /* I/O Sequences */
-
- class IoSequence {
- public:
- virtual ~IoSequence() = default;
-
- virtual std::string get_name() const = 0;
- int get_step() const;
- int get_seed() const;
-
- std::unique_ptr<IoOp> next();
-
- static std::unique_ptr<IoSequence>
- generate_sequence(Sequence s, std::pair<int,int> obj_size_range, int seed );
-
- protected:
- uint64_t min_obj_size;
- uint64_t max_obj_size;
- bool create;
- bool barrier;
- bool done;
- bool remove;
- uint64_t obj_size;
- int step;
- int seed;
- ceph::util::random_number_generator<int> rng =
- ceph::util::random_number_generator<int>();
-
- IoSequence(std::pair<int,int> obj_size_range, int seed);
-
- virtual std::unique_ptr<IoOp> _next() = 0;
-
- void set_min_object_size(uint64_t size);
- void set_max_object_size(uint64_t size);
- void select_random_object_size();
- std::unique_ptr<IoOp> increment_object_size();
-
- };
-
- class Seq0: public IoSequence {
- public:
- Seq0(std::pair<int,int> obj_size_range, int seed);
-
- std::string get_name() const override;
- std::unique_ptr<IoOp> _next() override;
-
- private:
- uint64_t offset;
- uint64_t length;
- };
-
- class Seq1: public IoSequence {
- public:
- Seq1(std::pair<int,int> obj_size_range, int seed);
-
- std::string get_name() const override;
- std::unique_ptr<IoOp> _next();
-
- private:
- int count;
- };
-
- class Seq2: public IoSequence {
- public:
- Seq2(std::pair<int,int> obj_size_range, int seed);
-
- std::string get_name() const override;
- std::unique_ptr<IoOp> _next() override;
-
- private:
- uint64_t offset;
- uint64_t length;
- };
-
- class Seq3: public IoSequence {
- public:
- Seq3(std::pair<int,int> obj_size_range, int seed);
-
- std::string get_name() const override;
- std::unique_ptr<IoOp> _next() override;
- private:
- uint64_t offset1;
- uint64_t offset2;
- };
-
- class Seq4: public IoSequence {
- public:
- Seq4(std::pair<int,int> obj_size_range, int seed);
-
- std::string get_name() const override;
- std::unique_ptr<IoOp> _next() override;
-
- private:
- uint64_t offset1;
- uint64_t offset2;
- };
-
- class Seq5: public IoSequence {
- public:
- Seq5(std::pair<int,int> obj_size_range, int seed);
-
- std::string get_name() const override;
- std::unique_ptr<IoOp> _next() override;
-
- private:
- uint64_t offset;
- uint64_t length;
- bool doneread;
- bool donebarrier;
- };
-
- class Seq6: public IoSequence {
- public:
- Seq6(std::pair<int,int> obj_size_range, int seed);
-
- std::string get_name() const override;
- std::unique_ptr<IoOp> _next() override;
-
- private:
- uint64_t offset;
- uint64_t length;
- bool doneread;
- bool donebarrier;
- };
-
- class Seq7: public IoSequence {
- public:
- Seq7(std::pair<int,int> obj_size_range, int seed);
-
- std::string get_name() const override;
- std::unique_ptr<IoOp> _next() override;
-
- private:
- uint64_t offset;
- bool doneread = true;
- bool donebarrier = false;
- };
-
- class Seq8: public IoSequence {
- public:
- Seq8(std::pair<int,int> obj_size_range, int seed);
-
- std::string get_name() const override;
- std::unique_ptr<IoOp> _next() override;
- private:
- uint64_t offset1;
- uint64_t offset2;
- bool doneread = true;
- bool donebarrier = false;
- };
-
- class Seq9: public IoSequence {
- private:
- uint64_t offset;
- uint64_t length;
- bool doneread = true;
- bool donebarrier = false;
-
- public:
- Seq9(std::pair<int,int> obj_size_range, int seed);
-
- std::string get_name() const override;
-
- std::unique_ptr<IoOp> _next() override;
- };
- }
-} \ No newline at end of file
+namespace io_exerciser {
+
+enum class Sequence {
+ SEQUENCE_SEQ0,
+ SEQUENCE_SEQ1,
+ SEQUENCE_SEQ2,
+ SEQUENCE_SEQ3,
+ SEQUENCE_SEQ4,
+ SEQUENCE_SEQ5,
+ SEQUENCE_SEQ6,
+ SEQUENCE_SEQ7,
+ SEQUENCE_SEQ8,
+ SEQUENCE_SEQ9,
+ SEQUENCE_SEQ10,
+
+ SEQUENCE_END,
+ SEQUENCE_BEGIN = SEQUENCE_SEQ0
+};
+
+inline Sequence operator++(Sequence& s) {
+ return s = (Sequence)(((int)(s) + 1));
+}
+
+std::ostream& operator<<(std::ostream& os, const Sequence& seq);
+
+/* I/O Sequences */
+
+class IoSequence {
+ public:
+ virtual ~IoSequence() = default;
+
+ virtual Sequence get_id() const = 0;
+ virtual std::string get_name_with_seqseed() const;
+ virtual std::string get_name() const = 0;
+ int get_step() const;
+ int get_seed() const;
+
+ virtual Sequence getNextSupportedSequenceId() const;
+ virtual std::unique_ptr<IoOp> next();
+
+ virtual bool is_supported(Sequence sequence) const;
+ static std::unique_ptr<IoSequence> generate_sequence(
+ Sequence s, std::pair<int, int> obj_size_range, int seed);
+
+ protected:
+ uint64_t min_obj_size;
+ uint64_t max_obj_size;
+ bool create;
+ bool barrier;
+ bool done;
+ bool remove;
+ uint64_t obj_size;
+ int step;
+ int seed;
+ ceph::util::random_number_generator<int> rng =
+ ceph::util::random_number_generator<int>();
+
+ IoSequence(std::pair<int, int> obj_size_range, int seed);
+
+ virtual std::unique_ptr<IoOp> _next() = 0;
+
+ void set_min_object_size(uint64_t size);
+ void set_max_object_size(uint64_t size);
+ void select_random_object_size();
+ std::unique_ptr<IoOp> increment_object_size();
+};
+
+class Seq0 : public IoSequence {
+ public:
+ Seq0(std::pair<int, int> obj_size_range, int seed);
+
+ Sequence get_id() const override;
+ std::string get_name() const override;
+ std::unique_ptr<IoOp> _next() override;
+
+ private:
+ uint64_t offset;
+ uint64_t length;
+};
+
+class Seq1 : public IoSequence {
+ public:
+ Seq1(std::pair<int, int> obj_size_range, int seed);
+
+ Sequence get_id() const override;
+ std::string get_name() const override;
+ std::unique_ptr<IoOp> _next() override;
+
+ private:
+ int count;
+};
+
+class Seq2 : public IoSequence {
+ public:
+ Seq2(std::pair<int, int> obj_size_range, int seed);
+
+ Sequence get_id() const override;
+ std::string get_name() const override;
+ std::unique_ptr<IoOp> _next() override;
+
+ private:
+ uint64_t offset;
+ uint64_t length;
+};
+
+class Seq3 : public IoSequence {
+ public:
+ Seq3(std::pair<int, int> obj_size_range, int seed);
+
+ Sequence get_id() const override;
+ std::string get_name() const override;
+ std::unique_ptr<IoOp> _next() override;
+
+ private:
+ uint64_t offset1;
+ uint64_t offset2;
+};
+
+class Seq4 : public IoSequence {
+ public:
+ Seq4(std::pair<int, int> obj_size_range, int seed);
+
+ Sequence get_id() const override;
+ std::string get_name() const override;
+ std::unique_ptr<IoOp> _next() override;
+
+ private:
+ uint64_t offset1;
+ uint64_t offset2;
+};
+
+class Seq5 : public IoSequence {
+ public:
+ Seq5(std::pair<int, int> obj_size_range, int seed);
+
+ Sequence get_id() const override;
+ std::string get_name() const override;
+ std::unique_ptr<IoOp> _next() override;
+
+ private:
+ uint64_t offset;
+ uint64_t length;
+ bool doneread;
+ bool donebarrier;
+};
+
+class Seq6 : public IoSequence {
+ public:
+ Seq6(std::pair<int, int> obj_size_range, int seed);
+
+ Sequence get_id() const override;
+ std::string get_name() const override;
+ std::unique_ptr<IoOp> _next() override;
+
+ private:
+ uint64_t offset;
+ uint64_t length;
+ bool doneread;
+ bool donebarrier;
+};
+
+class Seq7 : public IoSequence {
+ public:
+ Seq7(std::pair<int, int> obj_size_range, int seed);
+
+ Sequence get_id() const override;
+ std::string get_name() const override;
+ std::unique_ptr<IoOp> _next() override;
+
+ private:
+ uint64_t offset;
+ bool doneread = true;
+ bool donebarrier = false;
+};
+
+class Seq8 : public IoSequence {
+ public:
+ Seq8(std::pair<int, int> obj_size_range, int seed);
+
+ Sequence get_id() const override;
+ std::string get_name() const override;
+ std::unique_ptr<IoOp> _next() override;
+
+ private:
+ uint64_t offset1;
+ uint64_t offset2;
+ bool doneread = true;
+ bool donebarrier = false;
+};
+
+class Seq9 : public IoSequence {
+ private:
+ uint64_t offset;
+ uint64_t length;
+ bool doneread = true;
+ bool donebarrier = false;
+
+ public:
+ Seq9(std::pair<int, int> obj_size_range, int seed);
+
+ Sequence get_id() const override;
+ std::string get_name() const override;
+ std::unique_ptr<IoOp> _next() override;
+};
+} // namespace io_exerciser
+} // namespace ceph \ No newline at end of file
diff --git a/src/common/io_exerciser/Model.cc b/src/common/io_exerciser/Model.cc
index 50812ecbb15..6548e1eda7a 100644
--- a/src/common/io_exerciser/Model.cc
+++ b/src/common/io_exerciser/Model.cc
@@ -4,25 +4,11 @@
using Model = ceph::io_exerciser::Model;
-Model::Model(const std::string& oid, uint64_t block_size) :
-num_io(0),
-oid(oid),
-block_size(block_size)
-{
+Model::Model(const std::string& oid, uint64_t block_size)
+ : num_io(0), oid(oid), block_size(block_size) {}
-}
+const uint64_t Model::get_block_size() const { return block_size; }
-const uint64_t Model::get_block_size() const
-{
- return block_size;
-}
+const std::string Model::get_oid() const { return oid; }
-const std::string Model::get_oid() const
-{
- return oid;
-}
-
-int Model::get_num_io() const
-{
- return num_io;
-} \ No newline at end of file
+int Model::get_num_io() const { return num_io; } \ No newline at end of file
diff --git a/src/common/io_exerciser/Model.h b/src/common/io_exerciser/Model.h
index 58d107409a6..9e421e79a78 100644
--- a/src/common/io_exerciser/Model.h
+++ b/src/common/io_exerciser/Model.h
@@ -1,15 +1,13 @@
#pragma once
-#include "IoOp.h"
-
#include <boost/asio/io_context.hpp>
-#include "librados/librados_asio.h"
-
-#include "include/interval_set.h"
-#include "global/global_init.h"
-#include "global/global_context.h"
+#include "IoOp.h"
#include "common/Thread.h"
+#include "global/global_context.h"
+#include "global/global_init.h"
+#include "include/interval_set.h"
+#include "librados/librados_asio.h"
/* Overview
*
@@ -21,29 +19,27 @@
*/
namespace ceph {
- namespace io_exerciser {
-
- class Model
- {
- protected:
- int num_io{0};
- std::string oid;
- uint64_t block_size;
-
- public:
- Model(const std::string& oid, uint64_t block_size);
- virtual ~Model() = default;
-
- virtual bool readyForIoOp(IoOp& op) = 0;
- virtual void applyIoOp(IoOp& op) = 0;
-
- const std::string get_oid() const;
- const uint64_t get_block_size() const;
- int get_num_io() const;
- };
-
- /* Simple RADOS I/O generator */
-
-
- }
-} \ No newline at end of file
+namespace io_exerciser {
+
+class Model {
+ protected:
+ int num_io{0};
+ std::string oid;
+ uint64_t block_size;
+
+ public:
+ Model(const std::string& oid, uint64_t block_size);
+ virtual ~Model() = default;
+
+ virtual bool readyForIoOp(IoOp& op) = 0;
+ virtual void applyIoOp(IoOp& op) = 0;
+
+ const std::string get_oid() const;
+ const uint64_t get_block_size() const;
+ int get_num_io() const;
+};
+
+/* Simple RADOS I/O generator */
+
+} // namespace io_exerciser
+} // namespace ceph \ No newline at end of file
diff --git a/src/common/io_exerciser/ObjectModel.cc b/src/common/io_exerciser/ObjectModel.cc
index 589f6434282..454d7254cf2 100644
--- a/src/common/io_exerciser/ObjectModel.cc
+++ b/src/common/io_exerciser/ObjectModel.cc
@@ -6,25 +6,20 @@
using ObjectModel = ceph::io_exerciser::ObjectModel;
-ObjectModel::ObjectModel(const std::string& oid, uint64_t block_size, int seed) :
- Model(oid, block_size), created(false)
-{
+ObjectModel::ObjectModel(const std::string& oid, uint64_t block_size, int seed)
+ : Model(oid, block_size), created(false) {
rng.seed(seed);
}
-int ObjectModel::get_seed(uint64_t offset) const
-{
+int ObjectModel::get_seed(uint64_t offset) const {
ceph_assert(offset < contents.size());
return contents[offset];
}
-std::vector<int> ObjectModel::get_seed_offsets(int seed) const
-{
+std::vector<int> ObjectModel::get_seed_offsets(int seed) const {
std::vector<int> offsets;
- for (size_t i = 0; i < contents.size(); i++)
- {
- if (contents[i] == seed)
- {
+ for (size_t i = 0; i < contents.size(); i++) {
+ if (contents[i] == seed) {
offsets.push_back(i);
}
}
@@ -32,8 +27,7 @@ std::vector<int> ObjectModel::get_seed_offsets(int seed) const
return offsets;
}
-std::string ObjectModel::to_string(int mask) const
-{
+std::string ObjectModel::to_string(int mask) const {
if (!created) {
return "Object does not exist";
}
@@ -48,107 +42,127 @@ std::string ObjectModel::to_string(int mask) const
return result;
}
-bool ObjectModel::readyForIoOp(IoOp& op)
-{
- return true;
-}
-
-void ObjectModel::applyIoOp(IoOp& op)
-{
- auto generate_random = [&rng = rng]() {
- return rng();
- };
-
- switch (op.op) {
- case OpType::BARRIER:
- reads.clear();
- writes.clear();
- break;
-
- case OpType::CREATE:
- ceph_assert(!created);
- ceph_assert(reads.empty());
- ceph_assert(writes.empty());
- created = true;
- contents.resize(op.length1);
- std::generate(std::execution::seq, contents.begin(), contents.end(),
- generate_random);
- break;
-
- case OpType::REMOVE:
- ceph_assert(created);
- ceph_assert(reads.empty());
- ceph_assert(writes.empty());
- created = false;
- contents.resize(0);
- break;
-
- case OpType::READ3:
- ceph_assert(created);
- ceph_assert(op.offset3 + op.length3 <= contents.size());
- // Not allowed: read overlapping with parallel write
- ceph_assert(!writes.intersects(op.offset3, op.length3));
- reads.union_insert(op.offset3, op.length3);
- [[fallthrough]];
-
- case OpType::READ2:
- ceph_assert(created);
- ceph_assert(op.offset2 + op.length2 <= contents.size());
- // Not allowed: read overlapping with parallel write
- ceph_assert(!writes.intersects(op.offset2, op.length2));
- reads.union_insert(op.offset2, op.length2);
- [[fallthrough]];
-
- case OpType::READ:
- ceph_assert(created);
- ceph_assert(op.offset1 + op.length1 <= contents.size());
- // Not allowed: read overlapping with parallel write
- ceph_assert(!writes.intersects(op.offset1, op.length1));
- reads.union_insert(op.offset1, op.length1);
- num_io++;
- break;
-
- case OpType::WRITE3:
- ceph_assert(created);
- // Not allowed: write overlapping with parallel read or write
- ceph_assert(!reads.intersects(op.offset3, op.length3));
- ceph_assert(!writes.intersects(op.offset3, op.length3));
- writes.union_insert(op.offset3, op.length3);
- ceph_assert(op.offset3 + op.length3 <= contents.size());
- std::generate(std::execution::seq,
- std::next(contents.begin(), op.offset3),
- std::next(contents.begin(), op.offset3 + op.length3),
- generate_random);
- [[fallthrough]];
-
- case OpType::WRITE2:
- ceph_assert(created);
- // Not allowed: write overlapping with parallel read or write
- ceph_assert(!reads.intersects(op.offset2, op.length2));
- ceph_assert(!writes.intersects(op.offset2, op.length2));
- writes.union_insert(op.offset2, op.length2);
- ceph_assert(op.offset2 + op.length2 <= contents.size());
- std::generate(std::execution::seq,
- std::next(contents.begin(), op.offset2),
- std::next(contents.begin(), op.offset2 + op.length2),
- generate_random);
- [[fallthrough]];
-
- case OpType::WRITE:
- ceph_assert(created);
- // Not allowed: write overlapping with parallel read or write
- ceph_assert(!reads.intersects(op.offset1, op.length1));
- ceph_assert(!writes.intersects(op.offset1, op.length1));
- writes.union_insert(op.offset1, op.length1);
- ceph_assert(op.offset1 + op.length1 <= contents.size());
- std::generate(std::execution::seq,
- std::next(contents.begin(), op.offset1),
- std::next(contents.begin(), op.offset1 + op.length1),
- generate_random);
- num_io++;
- break;
- default:
- break;
+bool ObjectModel::readyForIoOp(IoOp& op) { return true; }
+
+void ObjectModel::applyIoOp(IoOp& op) {
+ auto generate_random = [&rng = rng]() { return rng(); };
+
+ auto verify_and_record_read_op =
+ [&contents = contents, &created = created, &num_io = num_io,
+ &reads = reads,
+ &writes = writes]<OpType opType, int N>(ReadWriteOp<opType, N>& readOp) {
+ ceph_assert(created);
+ for (int i = 0; i < N; i++) {
+ ceph_assert(readOp.offset[i] + readOp.length[i] <= contents.size());
+ // Not allowed: read overlapping with parallel write
+ ceph_assert(!writes.intersects(readOp.offset[i], readOp.length[i]));
+ reads.union_insert(readOp.offset[i], readOp.length[i]);
+ }
+ num_io++;
+ };
+
+ auto verify_write_and_record_and_generate_seed =
+ [&generate_random, &contents = contents, &created = created,
+ &num_io = num_io, &reads = reads,
+ &writes = writes]<OpType opType, int N>(ReadWriteOp<opType, N> writeOp) {
+ ceph_assert(created);
+ for (int i = 0; i < N; i++) {
+ // Not allowed: write overlapping with parallel read or write
+ ceph_assert(!reads.intersects(writeOp.offset[i], writeOp.length[i]));
+ ceph_assert(!writes.intersects(writeOp.offset[i], writeOp.length[i]));
+ writes.union_insert(writeOp.offset[i], writeOp.length[i]);
+ ceph_assert(writeOp.offset[i] + writeOp.length[i] <= contents.size());
+ std::generate(std::execution::seq,
+ std::next(contents.begin(), writeOp.offset[i]),
+ std::next(contents.begin(),
+ writeOp.offset[i] + writeOp.length[i]),
+ generate_random);
+ }
+ num_io++;
+ };
+
+ auto verify_failed_write_and_record =
+ [&contents = contents, &created = created, &num_io = num_io,
+ &reads = reads,
+ &writes = writes]<OpType opType, int N>(ReadWriteOp<opType, N> writeOp) {
+ // Ensure write should still be valid, even though we are expecting OSD
+ // failure
+ ceph_assert(created);
+ for (int i = 0; i < N; i++) {
+ // Not allowed: write overlapping with parallel read or write
+ ceph_assert(!reads.intersects(writeOp.offset[i], writeOp.length[i]));
+ ceph_assert(!writes.intersects(writeOp.offset[i], writeOp.length[i]));
+ writes.union_insert(writeOp.offset[i], writeOp.length[i]);
+ ceph_assert(writeOp.offset[i] + writeOp.length[i] <= contents.size());
+ }
+ num_io++;
+ };
+
+ switch (op.getOpType()) {
+ case OpType::Barrier:
+ reads.clear();
+ writes.clear();
+ break;
+
+ case OpType::Create:
+ ceph_assert(!created);
+ ceph_assert(reads.empty());
+ ceph_assert(writes.empty());
+ created = true;
+ contents.resize(static_cast<CreateOp&>(op).size);
+ std::generate(std::execution::seq, contents.begin(), contents.end(),
+ generate_random);
+ break;
+
+ case OpType::Remove:
+ ceph_assert(created);
+ ceph_assert(reads.empty());
+ ceph_assert(writes.empty());
+ created = false;
+ contents.resize(0);
+ break;
+
+ case OpType::Read: {
+ SingleReadOp& readOp = static_cast<SingleReadOp&>(op);
+ verify_and_record_read_op(readOp);
+ } break;
+ case OpType::Read2: {
+ DoubleReadOp& readOp = static_cast<DoubleReadOp&>(op);
+ verify_and_record_read_op(readOp);
+ } break;
+ case OpType::Read3: {
+ TripleReadOp& readOp = static_cast<TripleReadOp&>(op);
+ verify_and_record_read_op(readOp);
+ } break;
+
+ case OpType::Write: {
+ ceph_assert(created);
+ SingleWriteOp& writeOp = static_cast<SingleWriteOp&>(op);
+ verify_write_and_record_and_generate_seed(writeOp);
+ } break;
+ case OpType::Write2: {
+ DoubleWriteOp& writeOp = static_cast<DoubleWriteOp&>(op);
+ verify_write_and_record_and_generate_seed(writeOp);
+ } break;
+ case OpType::Write3: {
+ TripleWriteOp& writeOp = static_cast<TripleWriteOp&>(op);
+ verify_write_and_record_and_generate_seed(writeOp);
+ } break;
+ case OpType::FailedWrite: {
+ ceph_assert(created);
+ SingleWriteOp& writeOp = static_cast<SingleWriteOp&>(op);
+ verify_failed_write_and_record(writeOp);
+ } break;
+ case OpType::FailedWrite2: {
+ DoubleWriteOp& writeOp = static_cast<DoubleWriteOp&>(op);
+ verify_failed_write_and_record(writeOp);
+ } break;
+ case OpType::FailedWrite3: {
+ TripleWriteOp& writeOp = static_cast<TripleWriteOp&>(op);
+ verify_failed_write_and_record(writeOp);
+ } break;
+ default:
+ break;
}
}
diff --git a/src/common/io_exerciser/ObjectModel.h b/src/common/io_exerciser/ObjectModel.h
index 93c70f41429..cad1307b84e 100644
--- a/src/common/io_exerciser/ObjectModel.h
+++ b/src/common/io_exerciser/ObjectModel.h
@@ -14,40 +14,41 @@
*/
namespace ceph {
- namespace io_exerciser {
- /* Model of an object to track its data contents */
-
- class ObjectModel : public Model {
- private:
- bool created;
- std::vector<int> contents;
- ceph::util::random_number_generator<int> rng =
- ceph::util::random_number_generator<int>();
-
- // Track read and write I/Os that can be submitted in
- // parallel to detect violations:
- //
- // * Read may not overlap with a parallel write
- // * Write may not overlap with a parallel read or write
- // * Create / remove may not be in parallel with read or write
- //
- // Fix broken test cases by adding barrier ops to restrict
- // I/O exercisers from issuing conflicting ops in parallel
- interval_set<uint64_t> reads;
- interval_set<uint64_t> writes;
- public:
- ObjectModel(const std::string& oid, uint64_t block_size, int seed);
-
- int get_seed(uint64_t offset) const;
- std::vector<int> get_seed_offsets(int seed) const;
-
- std::string to_string(int mask = -1) const;
-
- bool readyForIoOp(IoOp& op);
- void applyIoOp(IoOp& op);
-
- void encode(ceph::buffer::list& bl) const;
- void decode(ceph::buffer::list::const_iterator& bl);
- };
- }
-} \ No newline at end of file
+namespace io_exerciser {
+/* Model of an object to track its data contents */
+
+class ObjectModel : public Model {
+ private:
+ bool created;
+ std::vector<int> contents;
+ ceph::util::random_number_generator<int> rng =
+ ceph::util::random_number_generator<int>();
+
+ // Track read and write I/Os that can be submitted in
+ // parallel to detect violations:
+ //
+ // * Read may not overlap with a parallel write
+ // * Write may not overlap with a parallel read or write
+ // * Create / remove may not be in parallel with read or write
+ //
+ // Fix broken test cases by adding barrier ops to restrict
+ // I/O exercisers from issuing conflicting ops in parallel
+ interval_set<uint64_t> reads;
+ interval_set<uint64_t> writes;
+
+ public:
+ ObjectModel(const std::string& oid, uint64_t block_size, int seed);
+
+ int get_seed(uint64_t offset) const;
+ std::vector<int> get_seed_offsets(int seed) const;
+
+ std::string to_string(int mask = -1) const;
+
+ bool readyForIoOp(IoOp& op);
+ void applyIoOp(IoOp& op);
+
+ void encode(ceph::buffer::list& bl) const;
+ void decode(ceph::buffer::list::const_iterator& bl);
+};
+} // namespace io_exerciser
+} // namespace ceph \ No newline at end of file
diff --git a/src/common/io_exerciser/OpType.h b/src/common/io_exerciser/OpType.h
new file mode 100644
index 00000000000..7cddb805e45
--- /dev/null
+++ b/src/common/io_exerciser/OpType.h
@@ -0,0 +1,91 @@
+#pragma once
+
+#include <fmt/format.h>
+#include <include/ceph_assert.h>
+
+/* Overview
+ *
+ * enum OpType
+ * Enumeration of different types of I/O operation
+ *
+ */
+
+namespace ceph {
+namespace io_exerciser {
+enum class OpType {
+ Done, // End of I/O sequence
+ Barrier, // Barrier - all prior I/Os must complete
+ Create, // Create object and pattern with data
+ Remove, // Remove object
+ Read, // Read
+ Read2, // Two reads in a single op
+ Read3, // Three reads in a single op
+ Write, // Write
+ Write2, // Two writes in a single op
+ Write3, // Three writes in a single op
+ FailedWrite, // A write which should fail
+ FailedWrite2, // Two writes in one op which should fail
+ FailedWrite3, // Three writes in one op which should fail
+ InjectReadError, // Op to tell OSD to inject read errors
+ InjectWriteError, // Op to tell OSD to inject write errors
+ ClearReadErrorInject, // Op to tell OSD to clear read error injects
+ ClearWriteErrorInject // Op to tell OSD to clear write error injects
+};
+
+enum class InjectOpType {
+ None,
+ ReadEIO,
+ ReadMissingShard,
+ WriteFailAndRollback,
+ WriteOSDAbort
+};
+} // namespace io_exerciser
+} // namespace ceph
+
+template <>
+struct fmt::formatter<ceph::io_exerciser::OpType> {
+ constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+
+ auto format(ceph::io_exerciser::OpType opType,
+ fmt::format_context& ctx) const -> fmt::format_context::iterator {
+ switch (opType) {
+ case ceph::io_exerciser::OpType::Done:
+ return fmt::format_to(ctx.out(), "Done");
+ case ceph::io_exerciser::OpType::Barrier:
+ return fmt::format_to(ctx.out(), "Barrier");
+ case ceph::io_exerciser::OpType::Create:
+ return fmt::format_to(ctx.out(), "Create");
+ case ceph::io_exerciser::OpType::Remove:
+ return fmt::format_to(ctx.out(), "Remove");
+ case ceph::io_exerciser::OpType::Read:
+ return fmt::format_to(ctx.out(), "Read");
+ case ceph::io_exerciser::OpType::Read2:
+ return fmt::format_to(ctx.out(), "Read2");
+ case ceph::io_exerciser::OpType::Read3:
+ return fmt::format_to(ctx.out(), "Read3");
+ case ceph::io_exerciser::OpType::Write:
+ return fmt::format_to(ctx.out(), "Write");
+ case ceph::io_exerciser::OpType::Write2:
+ return fmt::format_to(ctx.out(), "Write2");
+ case ceph::io_exerciser::OpType::Write3:
+ return fmt::format_to(ctx.out(), "Write3");
+ case ceph::io_exerciser::OpType::FailedWrite:
+ return fmt::format_to(ctx.out(), "FailedWrite");
+ case ceph::io_exerciser::OpType::FailedWrite2:
+ return fmt::format_to(ctx.out(), "FailedWrite2");
+ case ceph::io_exerciser::OpType::FailedWrite3:
+ return fmt::format_to(ctx.out(), "FailedWrite3");
+ case ceph::io_exerciser::OpType::InjectReadError:
+ return fmt::format_to(ctx.out(), "InjectReadError");
+ case ceph::io_exerciser::OpType::InjectWriteError:
+ return fmt::format_to(ctx.out(), "InjectWriteError");
+ case ceph::io_exerciser::OpType::ClearReadErrorInject:
+ return fmt::format_to(ctx.out(), "ClearReadErrorInject");
+ case ceph::io_exerciser::OpType::ClearWriteErrorInject:
+ return fmt::format_to(ctx.out(), "ClearWriteErrorInject");
+ default:
+ ceph_abort_msg("Unknown OpType");
+ return fmt::format_to(ctx.out(), "Unknown OpType");
+ }
+ }
+}; \ No newline at end of file
diff --git a/src/common/io_exerciser/RadosIo.cc b/src/common/io_exerciser/RadosIo.cc
index 44b82260263..4451900b7bb 100644
--- a/src/common/io_exerciser/RadosIo.cc
+++ b/src/common/io_exerciser/RadosIo.cc
@@ -1,300 +1,429 @@
#include "RadosIo.h"
+#include <fmt/format.h>
+#include <json_spirit/json_spirit.h>
+
+#include <ranges>
+
#include "DataGenerator.h"
+#include "common/ceph_json.h"
+#include "common/json/OSDStructures.h"
using RadosIo = ceph::io_exerciser::RadosIo;
-RadosIo::RadosIo(librados::Rados& rados,
- boost::asio::io_context& asio,
- const std::string& pool,
- const std::string& oid,
- uint64_t block_size,
- int seed,
- int threads,
- ceph::mutex& lock,
- ceph::condition_variable& cond) :
- Model(oid, block_size),
- rados(rados),
- asio(asio),
- om(std::make_unique<ObjectModel>(oid, block_size, seed)),
- db(data_generation::DataGenerator::create_generator(
- data_generation::GenerationType::HeaderedSeededRandom, *om)),
- pool(pool),
- threads(threads),
- lock(lock),
- cond(cond),
- outstanding_io(0)
-{
+RadosIo::RadosIo(librados::Rados& rados, boost::asio::io_context& asio,
+ const std::string& pool, const std::string& oid,
+ const std::optional<std::vector<int>>& cached_shard_order,
+ uint64_t block_size, int seed, int threads, ceph::mutex& lock,
+ ceph::condition_variable& cond)
+ : Model(oid, block_size),
+ rados(rados),
+ asio(asio),
+ om(std::make_unique<ObjectModel>(oid, block_size, seed)),
+ db(data_generation::DataGenerator::create_generator(
+ data_generation::GenerationType::HeaderedSeededRandom, *om)),
+ pool(pool),
+ cached_shard_order(cached_shard_order),
+ threads(threads),
+ lock(lock),
+ cond(cond),
+ outstanding_io(0) {
int rc;
rc = rados.ioctx_create(pool.c_str(), io);
ceph_assert(rc == 0);
allow_ec_overwrites(true);
}
-RadosIo::~RadosIo()
-{
-}
+RadosIo::~RadosIo() {}
-void RadosIo::start_io()
-{
+void RadosIo::start_io() {
std::lock_guard l(lock);
outstanding_io++;
}
-void RadosIo::finish_io()
-{
+void RadosIo::finish_io() {
std::lock_guard l(lock);
ceph_assert(outstanding_io > 0);
outstanding_io--;
cond.notify_all();
}
-void RadosIo::wait_for_io(int count)
-{
+void RadosIo::wait_for_io(int count) {
std::unique_lock l(lock);
while (outstanding_io > count) {
cond.wait(l);
}
}
-void RadosIo::allow_ec_overwrites(bool allow)
-{
+void RadosIo::allow_ec_overwrites(bool allow) {
int rc;
bufferlist inbl, outbl;
- std::string cmdstr =
- "{\"prefix\": \"osd pool set\", \"pool\": \"" + pool + "\", \
+ std::string cmdstr = "{\"prefix\": \"osd pool set\", \"pool\": \"" + pool +
+ "\", \
\"var\": \"allow_ec_overwrites\", \"val\": \"" +
- (allow ? "true" : "false") + "\"}";
+ (allow ? "true" : "false") + "\"}";
rc = rados.mon_command(cmdstr, inbl, &outbl, nullptr);
ceph_assert(rc == 0);
}
-RadosIo::AsyncOpInfo::AsyncOpInfo(uint64_t offset1, uint64_t length1,
- uint64_t offset2, uint64_t length2,
- uint64_t offset3, uint64_t length3 ) :
- offset1(offset1), length1(length1),
- offset2(offset2), length2(length2),
- offset3(offset3), length3(length3)
-{
-
-}
+template <int N>
+RadosIo::AsyncOpInfo<N>::AsyncOpInfo(const std::array<uint64_t, N>& offset,
+ const std::array<uint64_t, N>& length)
+ : offset(offset), length(length) {}
-bool RadosIo::readyForIoOp(IoOp &op)
-{
- ceph_assert(ceph_mutex_is_locked_by_me(lock)); //Must be called with lock held
+bool RadosIo::readyForIoOp(IoOp& op) {
+ ceph_assert(
+ ceph_mutex_is_locked_by_me(lock)); // Must be called with lock held
if (!om->readyForIoOp(op)) {
return false;
}
- switch (op.op) {
- case OpType::Done:
- case OpType::BARRIER:
- return outstanding_io == 0;
- default:
- return outstanding_io < threads;
+
+ switch (op.getOpType()) {
+ case OpType::Done:
+ case OpType::Barrier:
+ return outstanding_io == 0;
+ default:
+ return outstanding_io < threads;
}
}
-void RadosIo::applyIoOp(IoOp &op)
-{
- std::shared_ptr<AsyncOpInfo> op_info;
-
+void RadosIo::applyIoOp(IoOp& op) {
om->applyIoOp(op);
// If there are thread concurrent I/Os in flight then wait for
// at least one I/O to complete
- wait_for_io(threads-1);
-
- switch (op.op) {
- case OpType::Done:
- [[ fallthrough ]];
- case OpType::BARRIER:
- // Wait for all outstanding I/O to complete
- wait_for_io(0);
- break;
-
- case OpType::CREATE:
- {
+ wait_for_io(threads - 1);
+
+ switch (op.getOpType()) {
+ case OpType::Done:
+ [[fallthrough]];
+ case OpType::Barrier:
+ // Wait for all outstanding I/O to complete
+ wait_for_io(0);
+ break;
+
+ case OpType::Create: {
start_io();
- op_info = std::make_shared<AsyncOpInfo>(0, op.length1);
- op_info->bl1 = db->generate_data(0, op.length1);
- op_info->wop.write_full(op_info->bl1);
- auto create_cb = [this] (boost::system::error_code ec,
- version_t ver) {
+ uint64_t opSize = static_cast<CreateOp&>(op).size;
+ std::shared_ptr<AsyncOpInfo<1>> op_info =
+ std::make_shared<AsyncOpInfo<1>>(std::array<uint64_t, 1>{0},
+ std::array<uint64_t, 1>{opSize});
+ op_info->bufferlist[0] = db->generate_data(0, opSize);
+ op_info->wop.write_full(op_info->bufferlist[0]);
+ auto create_cb = [this](boost::system::error_code ec, version_t ver) {
ceph_assert(ec == boost::system::errc::success);
finish_io();
};
- librados::async_operate(asio, io, oid,
- &op_info->wop, 0, nullptr, create_cb);
+ librados::async_operate(asio, io, oid, &op_info->wop, 0, nullptr,
+ create_cb);
+ break;
}
- break;
- case OpType::REMOVE:
- {
+ case OpType::Remove: {
start_io();
- op_info = std::make_shared<AsyncOpInfo>();
+ auto op_info = std::make_shared<AsyncOpInfo<0>>();
op_info->wop.remove();
- auto remove_cb = [this] (boost::system::error_code ec,
- version_t ver) {
+ auto remove_cb = [this](boost::system::error_code ec, version_t ver) {
ceph_assert(ec == boost::system::errc::success);
finish_io();
};
- librados::async_operate(asio, io, oid,
- &op_info->wop, 0, nullptr, remove_cb);
+ librados::async_operate(asio, io, oid, &op_info->wop, 0, nullptr,
+ remove_cb);
+ break;
}
- break;
+ case OpType::Read:
+ [[fallthrough]];
+ case OpType::Read2:
+ [[fallthrough]];
+ case OpType::Read3:
+ [[fallthrough]];
+ case OpType::Write:
+ [[fallthrough]];
+ case OpType::Write2:
+ [[fallthrough]];
+ case OpType::Write3:
+ [[fallthrough]];
+ case OpType::FailedWrite:
+ [[fallthrough]];
+ case OpType::FailedWrite2:
+ [[fallthrough]];
+ case OpType::FailedWrite3:
+ applyReadWriteOp(op);
+ break;
+ case OpType::InjectReadError:
+ [[fallthrough]];
+ case OpType::InjectWriteError:
+ [[fallthrough]];
+ case OpType::ClearReadErrorInject:
+ [[fallthrough]];
+ case OpType::ClearWriteErrorInject:
+ applyInjectOp(op);
+ break;
+ default:
+ ceph_abort_msg("Unrecognised Op");
+ break;
+ }
+}
- case OpType::READ:
- {
- start_io();
- op_info = std::make_shared<AsyncOpInfo>(op.offset1, op.length1);
- op_info->rop.read(op.offset1 * block_size,
- op.length1 * block_size,
- &op_info->bl1, nullptr);
- auto read_cb = [this, op_info] (boost::system::error_code ec,
- version_t ver,
- bufferlist bl) {
- ceph_assert(ec == boost::system::errc::success);
- ceph_assert(db->validate(op_info->bl1,
- op_info->offset1,
- op_info->length1));
- finish_io();
- };
- librados::async_operate(asio, io, oid,
- &op_info->rop, 0, nullptr, read_cb);
- num_io++;
+void RadosIo::applyReadWriteOp(IoOp& op) {
+ auto applyReadOp = [this]<OpType opType, int N>(
+ ReadWriteOp<opType, N> readOp) {
+ auto op_info =
+ std::make_shared<AsyncOpInfo<N>>(readOp.offset, readOp.length);
+
+ for (int i = 0; i < N; i++) {
+ op_info->rop.read(readOp.offset[i] * block_size,
+ readOp.length[i] * block_size, &op_info->bufferlist[i],
+ nullptr);
}
- break;
+ auto read_cb = [this, op_info](boost::system::error_code ec, version_t ver,
+ bufferlist bl) {
+ ceph_assert(ec == boost::system::errc::success);
+ for (int i = 0; i < N; i++) {
+ ceph_assert(db->validate(op_info->bufferlist[i], op_info->offset[i],
+ op_info->length[i]));
+ }
+ finish_io();
+ };
+ librados::async_operate(asio, io, oid, &op_info->rop, 0, nullptr, read_cb);
+ num_io++;
+ };
- case OpType::READ2:
- {
- start_io();
- op_info = std::make_shared<AsyncOpInfo>(op.offset1,
- op.length1,
- op.offset2,
- op.length2);
-
- op_info->rop.read(op.offset1 * block_size,
- op.length1 * block_size,
- &op_info->bl1, nullptr);
- op_info->rop.read(op.offset2 * block_size,
- op.length2 * block_size,
- &op_info->bl2, nullptr);
- auto read2_cb = [this, op_info] (boost::system::error_code ec,
- version_t ver,
- bufferlist bl) {
- ceph_assert(ec == boost::system::errc::success);
- ceph_assert(db->validate(op_info->bl1,
- op_info->offset1,
- op_info->length1));
- ceph_assert(db->validate(op_info->bl2,
- op_info->offset2,
- op_info->length2));
- finish_io();
- };
- librados::async_operate(asio, io, oid,
- &op_info->rop, 0, nullptr, read2_cb);
- num_io++;
+ auto applyWriteOp = [this]<OpType opType, int N>(
+ ReadWriteOp<opType, N> writeOp) {
+ auto op_info =
+ std::make_shared<AsyncOpInfo<N>>(writeOp.offset, writeOp.length);
+ for (int i = 0; i < N; i++) {
+ op_info->bufferlist[i] =
+ db->generate_data(writeOp.offset[i], writeOp.length[i]);
+ op_info->wop.write(writeOp.offset[i] * block_size,
+ op_info->bufferlist[i]);
}
- break;
+ auto write_cb = [this](boost::system::error_code ec, version_t ver) {
+ ceph_assert(ec == boost::system::errc::success);
+ finish_io();
+ };
+ librados::async_operate(asio, io, oid, &op_info->wop, 0, nullptr, write_cb);
+ num_io++;
+ };
- case OpType::READ3:
- {
- start_io();
- op_info = std::make_shared<AsyncOpInfo>(op.offset1, op.length1,
- op.offset2, op.length2,
- op.offset3, op.length3);
- op_info->rop.read(op.offset1 * block_size,
- op.length1 * block_size,
- &op_info->bl1, nullptr);
- op_info->rop.read(op.offset2 * block_size,
- op.length2 * block_size,
- &op_info->bl2, nullptr);
- op_info->rop.read(op.offset3 * block_size,
- op.length3 * block_size,
- &op_info->bl3, nullptr);
- auto read3_cb = [this, op_info] (boost::system::error_code ec,
- version_t ver,
- bufferlist bl) {
- ceph_assert(ec == boost::system::errc::success);
- ceph_assert(db->validate(op_info->bl1,
- op_info->offset1,
- op_info->length1));
- ceph_assert(db->validate(op_info->bl2,
- op_info->offset2,
- op_info->length2));
- ceph_assert(db->validate(op_info->bl3,
- op_info->offset3,
- op_info->length3));
- finish_io();
- };
- librados::async_operate(asio, io, oid,
- &op_info->rop, 0, nullptr, read3_cb);
- num_io++;
+ auto applyFailedWriteOp = [this]<OpType opType, int N>(
+ ReadWriteOp<opType, N> writeOp) {
+ auto op_info =
+ std::make_shared<AsyncOpInfo<N>>(writeOp.offset, writeOp.length);
+ for (int i = 0; i < N; i++) {
+ op_info->bufferlist[i] =
+ db->generate_data(writeOp.offset[i], writeOp.length[i]);
+ op_info->wop.write(writeOp.offset[i] * block_size,
+ op_info->bufferlist[i]);
}
- break;
+ auto write_cb = [this, writeOp](boost::system::error_code ec,
+ version_t ver) {
+ ceph_assert(ec != boost::system::errc::success);
+ finish_io();
+ };
+ librados::async_operate(asio, io, oid, &op_info->wop, 0, nullptr, write_cb);
+ num_io++;
+ };
- case OpType::WRITE:
- {
+ switch (op.getOpType()) {
+ case OpType::Read: {
start_io();
- op_info = std::make_shared<AsyncOpInfo>(op.offset1, op.length1);
- op_info->bl1 = db->generate_data(op.offset1, op.length1);
-
- op_info->wop.write(op.offset1 * block_size, op_info->bl1);
- auto write_cb = [this] (boost::system::error_code ec,
- version_t ver) {
- ceph_assert(ec == boost::system::errc::success);
- finish_io();
- };
- librados::async_operate(asio, io, oid,
- &op_info->wop, 0, nullptr, write_cb);
- num_io++;
+ SingleReadOp& readOp = static_cast<SingleReadOp&>(op);
+ applyReadOp(readOp);
+ break;
}
- break;
-
- case OpType::WRITE2:
- {
+ case OpType::Read2: {
start_io();
- op_info = std::make_shared<AsyncOpInfo>(op.offset1, op.length1,
- op.offset2, op.length2);
- op_info->bl1 = db->generate_data(op.offset1, op.length1);
- op_info->bl2 = db->generate_data(op.offset2, op.length2);
- op_info->wop.write(op.offset1 * block_size, op_info->bl1);
- op_info->wop.write(op.offset2 * block_size, op_info->bl2);
- auto write2_cb = [this] (boost::system::error_code ec,
- version_t ver) {
- ceph_assert(ec == boost::system::errc::success);
- finish_io();
- };
- librados::async_operate(asio, io, oid,
- &op_info->wop, 0, nullptr, write2_cb);
- num_io++;
+ DoubleReadOp& readOp = static_cast<DoubleReadOp&>(op);
+ applyReadOp(readOp);
+ break;
+ }
+ case OpType::Read3: {
+ start_io();
+ TripleReadOp& readOp = static_cast<TripleReadOp&>(op);
+ applyReadOp(readOp);
+ break;
+ }
+ case OpType::Write: {
+ start_io();
+ SingleWriteOp& writeOp = static_cast<SingleWriteOp&>(op);
+ applyWriteOp(writeOp);
+ break;
+ }
+ case OpType::Write2: {
+ start_io();
+ DoubleWriteOp& writeOp = static_cast<DoubleWriteOp&>(op);
+ applyWriteOp(writeOp);
+ break;
+ }
+ case OpType::Write3: {
+ start_io();
+ TripleWriteOp& writeOp = static_cast<TripleWriteOp&>(op);
+ applyWriteOp(writeOp);
+ break;
}
- break;
- case OpType::WRITE3:
- {
+ case OpType::FailedWrite: {
start_io();
- op_info = std::make_shared<AsyncOpInfo>(op.offset1, op.length1,
- op.offset2, op.length2,
- op.offset3, op.length3);
- op_info->bl1 = db->generate_data(op.offset1, op.length1);
- op_info->bl2 = db->generate_data(op.offset2, op.length2);
- op_info->bl3 = db->generate_data(op.offset3, op.length3);
- op_info->wop.write(op.offset1 * block_size, op_info->bl1);
- op_info->wop.write(op.offset2 * block_size, op_info->bl2);
- op_info->wop.write(op.offset3 * block_size, op_info->bl3);
- auto write3_cb = [this] (boost::system::error_code ec,
- version_t ver) {
- ceph_assert(ec == boost::system::errc::success);
- finish_io();
- };
- librados::async_operate(asio, io, oid,
- &op_info->wop, 0, nullptr, write3_cb);
- num_io++;
+ SingleFailedWriteOp& writeOp = static_cast<SingleFailedWriteOp&>(op);
+ applyFailedWriteOp(writeOp);
+ break;
+ }
+ case OpType::FailedWrite2: {
+ start_io();
+ DoubleFailedWriteOp& writeOp = static_cast<DoubleFailedWriteOp&>(op);
+ applyFailedWriteOp(writeOp);
+ break;
+ }
+ case OpType::FailedWrite3: {
+ start_io();
+ TripleFailedWriteOp& writeOp = static_cast<TripleFailedWriteOp&>(op);
+ applyFailedWriteOp(writeOp);
+ break;
}
- break;
- default:
- break;
+ default:
+ ceph_abort_msg(
+ fmt::format("Unsupported Read/Write operation ({})", op.getOpType()));
+ break;
}
}
+
+void RadosIo::applyInjectOp(IoOp& op) {
+ bufferlist osdmap_inbl, inject_inbl, osdmap_outbl, inject_outbl;
+ auto formatter = std::make_unique<JSONFormatter>(false);
+ std::ostringstream oss;
+
+ int osd = -1;
+ std::vector<int> shard_order;
+
+ ceph::messaging::osd::OSDMapRequest osdMapRequest{pool, get_oid(), ""};
+ encode_json("OSDMapRequest", osdMapRequest, formatter.get());
+ formatter->flush(oss);
+ int rc = rados.mon_command(oss.str(), osdmap_inbl, &osdmap_outbl, nullptr);
+ ceph_assert(rc == 0);
+
+ JSONParser p;
+ bool success = p.parse(osdmap_outbl.c_str(), osdmap_outbl.length());
+ ceph_assert(success);
+
+ ceph::messaging::osd::OSDMapReply reply;
+ reply.decode_json(&p);
+
+ osd = reply.acting_primary;
+ shard_order = reply.acting;
+
+ switch (op.getOpType()) {
+ case OpType::InjectReadError: {
+ InjectReadErrorOp& errorOp = static_cast<InjectReadErrorOp&>(op);
+
+ if (errorOp.type == 0) {
+ ceph::messaging::osd::InjectECErrorRequest<InjectOpType::ReadEIO>
+ injectErrorRequest{pool, oid, errorOp.shard,
+ errorOp.type, errorOp.when, errorOp.duration};
+ encode_json("InjectECErrorRequest", injectErrorRequest,
+ formatter.get());
+ } else if (errorOp.type == 1) {
+ ceph::messaging::osd::InjectECErrorRequest<
+ InjectOpType::ReadMissingShard>
+ injectErrorRequest{pool, oid, errorOp.shard,
+ errorOp.type, errorOp.when, errorOp.duration};
+ encode_json("InjectECErrorRequest", injectErrorRequest,
+ formatter.get());
+ } else {
+ ceph_abort_msg("Unsupported inject type");
+ }
+ formatter->flush(oss);
+ int rc = rados.osd_command(osd, oss.str(), inject_inbl, &inject_outbl,
+ nullptr);
+ ceph_assert(rc == 0);
+ break;
+ }
+ case OpType::InjectWriteError: {
+ InjectWriteErrorOp& errorOp = static_cast<InjectWriteErrorOp&>(op);
+
+ if (errorOp.type == 0) {
+ ceph::messaging::osd::InjectECErrorRequest<
+ InjectOpType::WriteFailAndRollback>
+ injectErrorRequest{pool, oid, errorOp.shard,
+ errorOp.type, errorOp.when, errorOp.duration};
+ encode_json("InjectECErrorRequest", injectErrorRequest,
+ formatter.get());
+ } else if (errorOp.type == 3) {
+ ceph::messaging::osd::InjectECErrorRequest<InjectOpType::WriteOSDAbort>
+ injectErrorRequest{pool, oid, errorOp.shard,
+ errorOp.type, errorOp.when, errorOp.duration};
+ encode_json("InjectECErrorRequest", injectErrorRequest,
+ formatter.get());
+
+ // This inject is sent directly to the shard we want to inject the error
+ // on
+ osd = shard_order[errorOp.shard];
+ } else {
+ ceph_abort("Unsupported inject type");
+ }
+
+ formatter->flush(oss);
+ int rc = rados.osd_command(osd, oss.str(), inject_inbl, &inject_outbl,
+ nullptr);
+ ceph_assert(rc == 0);
+ break;
+ }
+ case OpType::ClearReadErrorInject: {
+ ClearReadErrorInjectOp& errorOp =
+ static_cast<ClearReadErrorInjectOp&>(op);
+
+ if (errorOp.type == 0) {
+ ceph::messaging::osd::InjectECClearErrorRequest<InjectOpType::ReadEIO>
+ clearErrorInject{pool, oid, errorOp.shard, errorOp.type};
+ encode_json("InjectECClearErrorRequest", clearErrorInject,
+ formatter.get());
+ } else if (errorOp.type == 1) {
+ ceph::messaging::osd::InjectECClearErrorRequest<
+ InjectOpType::ReadMissingShard>
+ clearErrorInject{pool, oid, errorOp.shard, errorOp.type};
+ encode_json("InjectECClearErrorRequest", clearErrorInject,
+ formatter.get());
+ } else {
+ ceph_abort("Unsupported inject type");
+ }
+
+ formatter->flush(oss);
+ int rc = rados.osd_command(osd, oss.str(), inject_inbl, &inject_outbl,
+ nullptr);
+ ceph_assert(rc == 0);
+ break;
+ }
+ case OpType::ClearWriteErrorInject: {
+ ClearReadErrorInjectOp& errorOp =
+ static_cast<ClearReadErrorInjectOp&>(op);
+
+ if (errorOp.type == 0) {
+ ceph::messaging::osd::InjectECClearErrorRequest<
+ InjectOpType::WriteFailAndRollback>
+ clearErrorInject{pool, oid, errorOp.shard, errorOp.type};
+ encode_json("InjectECClearErrorRequest", clearErrorInject,
+ formatter.get());
+ } else if (errorOp.type == 3) {
+ ceph::messaging::osd::InjectECClearErrorRequest<
+ InjectOpType::WriteOSDAbort>
+ clearErrorInject{pool, oid, errorOp.shard, errorOp.type};
+ encode_json("InjectECClearErrorRequest", clearErrorInject,
+ formatter.get());
+ } else {
+ ceph_abort("Unsupported inject type");
+ }
+
+ formatter->flush(oss);
+ int rc = rados.osd_command(osd, oss.str(), inject_inbl, &inject_outbl,
+ nullptr);
+ ceph_assert(rc == 0);
+ break;
+ }
+ default:
+ ceph_abort_msg(
+ fmt::format("Unsupported inject operation ({})", op.getOpType()));
+ break;
+ }
+} \ No newline at end of file
diff --git a/src/common/io_exerciser/RadosIo.h b/src/common/io_exerciser/RadosIo.h
index 179c5bba3ae..a5c66ad4768 100644
--- a/src/common/io_exerciser/RadosIo.h
+++ b/src/common/io_exerciser/RadosIo.h
@@ -10,71 +10,65 @@
* in the object. Uses DataBuffer to create and validate
* data buffers. When there are not barrier I/Os this may
* issue multiple async I/Os in parallel.
- *
+ *
*/
namespace ceph {
- namespace io_exerciser {
- namespace data_generation {
- class DataGenerator;
- }
-
- class RadosIo: public Model {
- protected:
- librados::Rados& rados;
- boost::asio::io_context& asio;
- std::unique_ptr<ObjectModel> om;
- std::unique_ptr<ceph::io_exerciser::data_generation::DataGenerator> db;
- std::string pool;
- int threads;
- ceph::mutex& lock;
- ceph::condition_variable& cond;
- librados::IoCtx io;
- int outstanding_io;
+namespace io_exerciser {
+namespace data_generation {
+class DataGenerator;
+}
+
+class RadosIo : public Model {
+ protected:
+ librados::Rados& rados;
+ boost::asio::io_context& asio;
+ std::unique_ptr<ObjectModel> om;
+ std::unique_ptr<ceph::io_exerciser::data_generation::DataGenerator> db;
+ std::string pool;
+ std::optional<std::vector<int>> cached_shard_order;
+ int threads;
+ ceph::mutex& lock;
+ ceph::condition_variable& cond;
+ librados::IoCtx io;
+ int outstanding_io;
+
+ void start_io();
+ void finish_io();
+ void wait_for_io(int count);
+
+ public:
+ RadosIo(librados::Rados& rados, boost::asio::io_context& asio,
+ const std::string& pool, const std::string& oid,
+ const std::optional<std::vector<int>>& cached_shard_order,
+ uint64_t block_size, int seed, int threads, ceph::mutex& lock,
+ ceph::condition_variable& cond);
- void start_io();
- void finish_io();
- void wait_for_io(int count);
-
- public:
- RadosIo(librados::Rados& rados,
- boost::asio::io_context& asio,
- const std::string& pool,
- const std::string& oid,
- uint64_t block_size,
- int seed,
- int threads,
- ceph::mutex& lock,
- ceph::condition_variable& cond);
+ ~RadosIo();
- ~RadosIo();
+ void allow_ec_overwrites(bool allow);
- void allow_ec_overwrites(bool allow);
+ template <int N>
+ class AsyncOpInfo {
+ public:
+ librados::ObjectReadOperation rop;
+ librados::ObjectWriteOperation wop;
+ std::array<ceph::bufferlist, N> bufferlist;
+ std::array<uint64_t, N> offset;
+ std::array<uint64_t, N> length;
- class AsyncOpInfo {
- public:
- librados::ObjectReadOperation rop;
- librados::ObjectWriteOperation wop;
- ceph::buffer::list bl1;
- ceph::buffer::list bl2;
- ceph::buffer::list bl3;
- uint64_t offset1;
- uint64_t length1;
- uint64_t offset2;
- uint64_t length2;
- uint64_t offset3;
- uint64_t length3;
+ AsyncOpInfo(const std::array<uint64_t, N>& offset = {},
+ const std::array<uint64_t, N>& length = {});
+ ~AsyncOpInfo() = default;
+ };
- AsyncOpInfo(uint64_t offset1 = 0, uint64_t length1 = 0,
- uint64_t offset2 = 0, uint64_t length2 = 0,
- uint64_t offset3 = 0, uint64_t length3 = 0 );
- ~AsyncOpInfo() = default;
- };
+ // Must be called with lock held
+ bool readyForIoOp(IoOp& op);
+ void applyIoOp(IoOp& op);
- // Must be called with lock held
- bool readyForIoOp(IoOp& op);
-
- void applyIoOp(IoOp& op);
- };
- }
-} \ No newline at end of file
+ private:
+ void applyReadWriteOp(IoOp& op);
+ void applyInjectOp(IoOp& op);
+};
+} // namespace io_exerciser
+} // namespace ceph \ No newline at end of file
diff --git a/src/common/json/BalancerStructures.cc b/src/common/json/BalancerStructures.cc
new file mode 100644
index 00000000000..48dfb843761
--- /dev/null
+++ b/src/common/json/BalancerStructures.cc
@@ -0,0 +1,38 @@
+#include "BalancerStructures.h"
+
+#include "common/ceph_json.h"
+
+using namespace ceph::messaging::balancer;
+
+void BalancerOffRequest::dump(Formatter* f) const {
+ encode_json("prefix", "balancer off", f);
+}
+
+void BalancerOffRequest::decode_json(JSONObj* obj) {}
+
+void BalancerStatusRequest::dump(Formatter* f) const {
+ encode_json("prefix", "balancer status", f);
+}
+
+void BalancerStatusRequest::decode_json(JSONObj* obj) {}
+
+void BalancerStatusReply::dump(Formatter* f) const {
+ encode_json("active", active, f);
+ encode_json("last_optimization_duration", last_optimization_duration, f);
+ encode_json("last_optimization_started", last_optimization_started, f);
+ encode_json("mode", mode, f);
+ encode_json("no_optimization_needed", no_optimization_needed, f);
+ encode_json("optimize_result", optimize_result, f);
+}
+
+void BalancerStatusReply::decode_json(JSONObj* obj) {
+ JSONDecoder::decode_json("active", active, obj);
+ JSONDecoder::decode_json("last_optimization_duration",
+ last_optimization_duration, obj);
+ JSONDecoder::decode_json("last_optimization_started",
+ last_optimization_started, obj);
+ JSONDecoder::decode_json("mode", mode, obj);
+ JSONDecoder::decode_json("no_optimization_needed", no_optimization_needed,
+ obj);
+ JSONDecoder::decode_json("optimize_result", optimize_result, obj);
+} \ No newline at end of file
diff --git a/src/common/json/BalancerStructures.h b/src/common/json/BalancerStructures.h
new file mode 100644
index 00000000000..bbf5c748eb3
--- /dev/null
+++ b/src/common/json/BalancerStructures.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include <string>
+
+#include "include/types.h"
+
+class JSONObj;
+
+namespace ceph {
+namespace messaging {
+namespace balancer {
+struct BalancerOffRequest {
+ void dump(Formatter* f) const;
+ void decode_json(JSONObj* obj);
+};
+
+struct BalancerStatusRequest {
+ void dump(Formatter* f) const;
+ void decode_json(JSONObj* obj);
+};
+
+struct BalancerStatusReply {
+ bool active;
+ std::string last_optimization_duration;
+ std::string last_optimization_started;
+ std::string mode;
+ bool no_optimization_needed;
+ std::string optimize_result;
+
+ void dump(Formatter* f) const;
+ void decode_json(JSONObj* obj);
+};
+} // namespace balancer
+} // namespace messaging
+} // namespace ceph \ No newline at end of file
diff --git a/src/common/json/CMakeLists.txt b/src/common/json/CMakeLists.txt
new file mode 100644
index 00000000000..1497daf93db
--- /dev/null
+++ b/src/common/json/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_library(json_structures STATIC
+ BalancerStructures.cc ConfigStructures.cc OSDStructures.cc)
+
+ target_link_libraries(json_structures global) \ No newline at end of file
diff --git a/src/common/json/ConfigStructures.cc b/src/common/json/ConfigStructures.cc
new file mode 100644
index 00000000000..651278d002a
--- /dev/null
+++ b/src/common/json/ConfigStructures.cc
@@ -0,0 +1,20 @@
+#include "ConfigStructures.h"
+
+#include "common/ceph_json.h"
+
+using namespace ceph::messaging::config;
+
+void ConfigSetRequest::dump(Formatter* f) const {
+ encode_json("prefix", "config set", f);
+ encode_json("who", who, f);
+ encode_json("name", name, f);
+ encode_json("value", value, f);
+ encode_json("force", force, f);
+}
+
+void ConfigSetRequest::decode_json(JSONObj* obj) {
+ JSONDecoder::decode_json("who", who, obj);
+ JSONDecoder::decode_json("name", name, obj);
+ JSONDecoder::decode_json("value", value, obj);
+ JSONDecoder::decode_json("force", force, obj);
+} \ No newline at end of file
diff --git a/src/common/json/ConfigStructures.h b/src/common/json/ConfigStructures.h
new file mode 100644
index 00000000000..554229d75f4
--- /dev/null
+++ b/src/common/json/ConfigStructures.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <optional>
+#include <string>
+
+#include "include/types.h"
+
+class JSONObj;
+
+namespace ceph {
+namespace messaging {
+namespace config {
+struct ConfigSetRequest {
+ std::string who;
+ std::string name;
+ std::string value;
+ std::optional<bool> force;
+
+ void dump(Formatter* f) const;
+ void decode_json(JSONObj* obj);
+};
+} // namespace config
+} // namespace messaging
+} // namespace ceph \ No newline at end of file
diff --git a/src/common/json/OSDStructures.cc b/src/common/json/OSDStructures.cc
new file mode 100644
index 00000000000..aaac5f6e169
--- /dev/null
+++ b/src/common/json/OSDStructures.cc
@@ -0,0 +1,150 @@
+#include "OSDStructures.h"
+
+#include "common/ceph_json.h"
+#include "common/io_exerciser/OpType.h"
+
+using namespace ceph::messaging::osd;
+
+void OSDMapRequest::dump(Formatter* f) const {
+ encode_json("prefix", "osd map", f);
+ encode_json("pool", pool, f);
+ encode_json("object", object, f);
+ encode_json("nspace", nspace, f);
+ encode_json("format", format, f);
+}
+
+void OSDMapRequest::decode_json(JSONObj* obj) {
+ JSONDecoder::decode_json("pool", pool, obj);
+ JSONDecoder::decode_json("object", object, obj);
+ JSONDecoder::decode_json("nspace", nspace, obj);
+ JSONDecoder::decode_json("format", format, obj);
+}
+
+void OSDMapReply::dump(Formatter* f) const {
+ encode_json("epoch", epoch, f);
+ encode_json("pool", pool, f);
+ encode_json("pool_id", pool_id, f);
+ encode_json("objname", objname, f);
+ encode_json("raw_pgid", raw_pgid, f);
+ encode_json("pgid", pgid, f);
+ encode_json("up", up, f);
+ encode_json("up_primary", up_primary, f);
+ encode_json("acting", acting, f);
+ encode_json("acting_primary", acting_primary, f);
+}
+
+void OSDMapReply::decode_json(JSONObj* obj) {
+ JSONDecoder::decode_json("epoch", epoch, obj);
+ JSONDecoder::decode_json("pool", pool, obj);
+ JSONDecoder::decode_json("pool_id", pool_id, obj);
+ JSONDecoder::decode_json("objname", objname, obj);
+ JSONDecoder::decode_json("raw_pgid", raw_pgid, obj);
+ JSONDecoder::decode_json("pgid", pgid, obj);
+ JSONDecoder::decode_json("up", up, obj);
+ JSONDecoder::decode_json("up_primary", up_primary, obj);
+ JSONDecoder::decode_json("acting", acting, obj);
+ JSONDecoder::decode_json("acting_primary", acting_primary, obj);
+}
+
+void OSDPoolGetRequest::dump(Formatter* f) const {
+ encode_json("prefix", "osd pool get", f);
+ encode_json("pool", pool, f);
+ encode_json("var", var, f);
+ encode_json("format", format, f);
+}
+
+void OSDPoolGetRequest::decode_json(JSONObj* obj) {
+ JSONDecoder::decode_json("pool", pool, obj);
+ JSONDecoder::decode_json("var", var, obj);
+ JSONDecoder::decode_json("format", format, obj);
+}
+
+void OSDPoolGetReply::dump(Formatter* f) const {
+ encode_json("erasure_code_profile", erasure_code_profile, f);
+}
+
+void OSDPoolGetReply::decode_json(JSONObj* obj) {
+ JSONDecoder::decode_json("erasure_code_profile", erasure_code_profile, obj);
+}
+
+void OSDECProfileGetRequest::dump(Formatter* f) const {
+ encode_json("prefix", "osd pool get", f);
+ encode_json("name", name, f);
+ encode_json("format", format, f);
+}
+
+void OSDECProfileGetRequest::decode_json(JSONObj* obj) {
+ JSONDecoder::decode_json("name", name, obj);
+ JSONDecoder::decode_json("format", format, obj);
+}
+
+void OSDECProfileGetReply::dump(Formatter* f) const {
+ encode_json("crush-device-class", crush_device_class, f);
+ encode_json("crush-failure-domain", crush_failure_domain, f);
+ encode_json("crush-num-failure-domains", crush_num_failure_domains, f);
+ encode_json("crush-osds-per-failure-domain", crush_osds_per_failure_domain,
+ f);
+ encode_json("crush-root", crush_root, f);
+ encode_json("jerasure-per-chunk-alignment", jerasure_per_chunk_alignment, f);
+ encode_json("k", k, f);
+ encode_json("m", m, f);
+ encode_json("plugin", plugin, f);
+ encode_json("technique", technique, f);
+ encode_json("w", w, f);
+}
+
+void OSDECProfileGetReply::decode_json(JSONObj* obj) {
+ JSONDecoder::decode_json("crush-device-class", crush_device_class, obj);
+ JSONDecoder::decode_json("crush-failure-domain", crush_failure_domain, obj);
+ JSONDecoder::decode_json("crush-num-failure-domains",
+ crush_num_failure_domains, obj);
+ JSONDecoder::decode_json("crush-osds-per-failure-domain",
+ crush_osds_per_failure_domain, obj);
+ JSONDecoder::decode_json("crush-root", crush_root, obj);
+ JSONDecoder::decode_json("jerasure-per-chunk-alignment",
+ jerasure_per_chunk_alignment, obj);
+ JSONDecoder::decode_json("k", k, obj);
+ JSONDecoder::decode_json("m", m, obj);
+ JSONDecoder::decode_json("plugin", plugin, obj);
+ JSONDecoder::decode_json("technique", technique, obj);
+ JSONDecoder::decode_json("w", w, obj);
+}
+
+void OSDECProfileSetRequest::dump(Formatter* f) const {
+ encode_json("prefix", "osd erasure-code-profile set", f);
+ encode_json("name", name, f);
+ encode_json("profile", profile, f);
+}
+
+void OSDECProfileSetRequest::decode_json(JSONObj* obj) {
+ JSONDecoder::decode_json("name", name, obj);
+ JSONDecoder::decode_json("profile", profile, obj);
+}
+
+void OSDECPoolCreateRequest::dump(Formatter* f) const {
+ encode_json("prefix", "osd pool create", f);
+ encode_json("pool", pool, f);
+ encode_json("pool_type", pool_type, f);
+ encode_json("pg_num", pg_num, f);
+ encode_json("pgp_num", pgp_num, f);
+ encode_json("erasure_code_profile", erasure_code_profile, f);
+}
+
+void OSDECPoolCreateRequest::decode_json(JSONObj* obj) {
+ JSONDecoder::decode_json("pool", pool, obj);
+ JSONDecoder::decode_json("pool_type", pool_type, obj);
+ JSONDecoder::decode_json("pg_num", pg_num, obj);
+ JSONDecoder::decode_json("pgp_num", pgp_num, obj);
+ JSONDecoder::decode_json("erasure_code_profile", erasure_code_profile, obj);
+}
+
+void OSDSetRequest::dump(Formatter* f) const {
+ encode_json("prefix", "osd set", f);
+ encode_json("key", key, f);
+ encode_json("yes_i_really_mean_it", yes_i_really_mean_it, f);
+}
+
+void OSDSetRequest::decode_json(JSONObj* obj) {
+ JSONDecoder::decode_json("key", key, obj);
+ JSONDecoder::decode_json("yes_i_really_mean_it", yes_i_really_mean_it, obj);
+} \ No newline at end of file
diff --git a/src/common/json/OSDStructures.h b/src/common/json/OSDStructures.h
new file mode 100644
index 00000000000..3e4528a099f
--- /dev/null
+++ b/src/common/json/OSDStructures.h
@@ -0,0 +1,189 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/ceph_json.h"
+#include "common/io_exerciser/OpType.h"
+#include "include/types.h"
+
+class JSONObj;
+
+namespace ceph {
+namespace messaging {
+namespace osd {
+struct OSDMapRequest {
+ std::string pool;
+ std::string object;
+ std::string nspace;
+ std::string format = "json";
+
+ void dump(Formatter* f) const;
+ void decode_json(JSONObj* obj);
+};
+
+struct OSDMapReply {
+ epoch_t epoch;
+ std::string pool;
+ uint64_t pool_id;
+ std::string objname;
+ std::string raw_pgid;
+ std::string pgid;
+ std::vector<int> up;
+ int up_primary;
+ std::vector<int> acting;
+ int acting_primary;
+
+ void dump(Formatter* f) const;
+ void decode_json(JSONObj* obj);
+};
+
+struct OSDPoolGetRequest {
+ std::string pool;
+ std::string var = "erasure_code_profile";
+ std::string format = "json";
+
+ void dump(Formatter* f) const;
+ void decode_json(JSONObj* obj);
+};
+
+struct OSDPoolGetReply {
+ std::string erasure_code_profile;
+
+ void dump(Formatter* f) const;
+ void decode_json(JSONObj* obj);
+};
+
+struct OSDECProfileGetRequest {
+ std::string name;
+ std::string format = "json";
+
+ void dump(Formatter* f) const;
+ void decode_json(JSONObj* obj);
+};
+
+struct OSDECProfileGetReply {
+ std::string crush_device_class;
+ std::string crush_failure_domain;
+ int crush_num_failure_domains;
+ int crush_osds_per_failure_domain;
+ std::string crush_root;
+ bool jerasure_per_chunk_alignment;
+ int k;
+ int m;
+ std::string plugin;
+ std::string technique;
+ std::string w;
+
+ void dump(Formatter* f) const;
+ void decode_json(JSONObj* obj);
+};
+
+struct OSDECProfileSetRequest {
+ std::string name;
+ std::vector<std::string> profile;
+
+ void dump(Formatter* f) const;
+ void decode_json(JSONObj* obj);
+};
+
+struct OSDECPoolCreateRequest {
+ std::string pool;
+ std::string pool_type;
+ int pg_num;
+ int pgp_num;
+ std::string erasure_code_profile;
+
+ void dump(Formatter* f) const;
+ void decode_json(JSONObj* obj);
+};
+
+struct OSDSetRequest {
+ std::string key;
+ std::optional<bool> yes_i_really_mean_it = std::nullopt;
+
+ void dump(Formatter* f) const;
+ void decode_json(JSONObj* obj);
+};
+
+// These structures are sent directly to the relevant OSD
+// rather than the monitor
+template <io_exerciser::InjectOpType op_type>
+struct InjectECErrorRequest {
+ std::string pool;
+ std::string objname;
+ int shardid;
+ std::optional<uint64_t> type;
+ std::optional<uint64_t> when;
+ std::optional<uint64_t> duration;
+
+ void dump(Formatter* f) const {
+ switch (op_type) {
+ case io_exerciser::InjectOpType::ReadEIO:
+ [[fallthrough]];
+ case io_exerciser::InjectOpType::ReadMissingShard:
+ ::encode_json("prefix", "injectecreaderr", f);
+ break;
+ case io_exerciser::InjectOpType::WriteFailAndRollback:
+ [[fallthrough]];
+ case io_exerciser::InjectOpType::WriteOSDAbort:
+ ::encode_json("prefix", "injectecwriteerr", f);
+ break;
+ default:
+ ceph_abort_msg("Unsupported Inject Type");
+ }
+ ::encode_json("pool", pool, f);
+ ::encode_json("objname", objname, f);
+ ::encode_json("shardid", shardid, f);
+ ::encode_json("type", type, f);
+ ::encode_json("when", when, f);
+ ::encode_json("duration", duration, f);
+ }
+ void decode_json(JSONObj* obj) {
+ JSONDecoder::decode_json("pool", pool, obj);
+ JSONDecoder::decode_json("objname", objname, obj);
+ JSONDecoder::decode_json("shardid", shardid, obj);
+ JSONDecoder::decode_json("type", type, obj);
+ JSONDecoder::decode_json("when", when, obj);
+ JSONDecoder::decode_json("duration", duration, obj);
+ }
+};
+
+template <io_exerciser::InjectOpType op_type>
+struct InjectECClearErrorRequest {
+ std::string pool;
+ std::string objname;
+ int shardid;
+ std::optional<uint64_t> type;
+
+ void dump(Formatter* f) const {
+ switch (op_type) {
+ case io_exerciser::InjectOpType::ReadEIO:
+ [[fallthrough]];
+ case io_exerciser::InjectOpType::ReadMissingShard:
+ ::encode_json("prefix", "injectecclearreaderr", f);
+ break;
+ case io_exerciser::InjectOpType::WriteFailAndRollback:
+ [[fallthrough]];
+ case io_exerciser::InjectOpType::WriteOSDAbort:
+ ::encode_json("prefix", "injectecclearwriteerr", f);
+ break;
+ default:
+ ceph_abort_msg("Unsupported Inject Type");
+ }
+ ::encode_json("pool", pool, f);
+ ::encode_json("objname", objname, f);
+ ::encode_json("shardid", shardid, f);
+ ::encode_json("type", type, f);
+ }
+ void decode_json(JSONObj* obj) {
+ JSONDecoder::decode_json("pool", pool, obj);
+ JSONDecoder::decode_json("objname", objname, obj);
+ JSONDecoder::decode_json("shardid", shardid, obj);
+ JSONDecoder::decode_json("type", type, obj);
+ }
+};
+} // namespace osd
+} // namespace messaging
+} // namespace ceph \ No newline at end of file
diff --git a/src/common/options/crimson.yaml.in b/src/common/options/crimson.yaml.in
index abef7483bcf..132a4a09e89 100644
--- a/src/common/options/crimson.yaml.in
+++ b/src/common/options/crimson.yaml.in
@@ -6,7 +6,7 @@ options:
type: str
level: advanced
desc: backend type for a Crimson OSD (e.g seastore or bluestore)
- default: seastore
+ default: bluestore
enum_values:
- bluestore
- seastore
diff --git a/src/common/options/mds.yaml.in b/src/common/options/mds.yaml.in
index 4cbc079a215..03a53cd7cea 100644
--- a/src/common/options/mds.yaml.in
+++ b/src/common/options/mds.yaml.in
@@ -1745,4 +1745,4 @@ options:
default: 16
services:
- mds
- min: 8
+ min: 4
diff --git a/src/common/options/mon.yaml.in b/src/common/options/mon.yaml.in
index ab1634bc154..1307030e3fb 100644
--- a/src/common/options/mon.yaml.in
+++ b/src/common/options/mon.yaml.in
@@ -91,6 +91,13 @@ options:
default: 1000
services:
- mon
+- name: mon_nvmeofgw_delete_grace
+ type: secs
+ level: advanced
+ desc: Issue NVMEOF_GATEWAY_DELETING health warning after this amount of time has elapsed
+ default: 15_min
+ services:
+ - mon
- name: mon_mgr_inactive_grace
type: int
level: advanced
diff --git a/src/common/pick_address.cc b/src/common/pick_address.cc
index d125d7171e0..a0629a15686 100644
--- a/src/common/pick_address.cc
+++ b/src/common/pick_address.cc
@@ -642,17 +642,24 @@ int get_iface_numa_node(
bool is_addr_in_subnet(
CephContext *cct,
const std::string &networks,
- const std::string &addr)
+ const entity_addr_t &addr)
{
const auto nets = get_str_list(networks);
ceph_assert(!nets.empty());
-
unsigned ipv = CEPH_PICK_ADDRESS_IPV4;
- struct sockaddr_in public_addr;
- public_addr.sin_family = AF_INET;
-
- if(inet_pton(AF_INET, addr.c_str(), &public_addr.sin_addr) != 1) {
- lderr(cct) << "unable to convert chosen address to string: " << addr << dendl;
+ struct sockaddr_in6 public_addr6;
+ struct sockaddr_in public_addr4;
+
+ if (addr.is_ipv4() &&
+ inet_pton(AF_INET, addr.ip_only_to_str().c_str(), &public_addr4.sin_addr) == 1) {
+ public_addr4.sin_family = AF_INET;
+ } else if (addr.is_ipv6() &&
+ inet_pton(AF_INET6, addr.ip_only_to_str().c_str(), &public_addr6.sin6_addr) == 1) {
+ public_addr6.sin6_family = AF_INET6;
+ ipv = CEPH_PICK_ADDRESS_IPV6;
+ } else {
+ std::string_view addr_type = addr.is_ipv4() ? "IPv4" : "IPv6";
+ lderr(cct) << "IP address " << addr << " is not parseable as " << addr_type << dendl;
return false;
}
@@ -660,10 +667,16 @@ bool is_addr_in_subnet(
struct ifaddrs ifa;
memset(&ifa, 0, sizeof(ifa));
ifa.ifa_next = nullptr;
- ifa.ifa_addr = (struct sockaddr*)&public_addr;
+ if (addr.is_ipv4()) {
+ ifa.ifa_addr = (struct sockaddr*)&public_addr4;
+ } else if (addr.is_ipv6()) {
+ ifa.ifa_addr = (struct sockaddr*)&public_addr6;
+ }
+
if(matches_with_net(cct, ifa, net, ipv)) {
return true;
}
}
+ lderr(cct) << "address " << addr << " is not in networks '" << networks << "'" << dendl;
return false;
}
diff --git a/src/common/pick_address.h b/src/common/pick_address.h
index 40575d7d155..c28a6037ded 100644
--- a/src/common/pick_address.h
+++ b/src/common/pick_address.h
@@ -98,6 +98,6 @@ int get_iface_numa_node(
bool is_addr_in_subnet(
CephContext *cct,
const std::string &networks,
- const std::string &addr);
+ const entity_addr_t &addr);
#endif
diff --git a/src/crimson/common/shared_lru.h b/src/crimson/common/shared_lru.h
index 92d99d332c4..0d73658e709 100644
--- a/src/crimson/common/shared_lru.h
+++ b/src/crimson/common/shared_lru.h
@@ -25,12 +25,17 @@ class SharedLRU {
SimpleLRU<K, shared_ptr_t, false> cache;
std::map<K, std::pair<weak_ptr_t, V*>> weak_refs;
+ // Once all of the shared pointers are destoryed,
+ // erase the tracked object from the weak_ref map
+ // before actually destorying it
struct Deleter {
- SharedLRU<K,V>* cache;
+ SharedLRU<K,V>* shared_lru_ptr;
const K key;
- void operator()(V* ptr) {
- cache->_erase_weak(key);
- delete ptr;
+ void operator()(V* value_ptr) {
+ if (shared_lru_ptr) {
+ shared_lru_ptr->_erase_weak(key);
+ }
+ delete value_ptr;
}
};
void _erase_weak(const K& key) {
@@ -42,9 +47,19 @@ public:
{}
~SharedLRU() {
cache.clear();
+
// initially, we were assuming that no pointer obtained from SharedLRU
// can outlive the lru itself. However, since going with the interruption
// concept for handling shutdowns, this is no longer valid.
+ // Moreover, before clearing weak_refs, invalidate each deleter
+ // cache pointer as this SharedLRU is being destoryed.
+ for (const auto& [key, value] : weak_refs) {
+ shared_ptr_t val;
+ val = value.first.lock();
+ auto this_deleter = get_deleter<Deleter>(val);
+ this_deleter->shared_lru_ptr = nullptr;
+ }
+
weak_refs.clear();
}
/**
diff --git a/src/crimson/os/alienstore/alien_store.cc b/src/crimson/os/alienstore/alien_store.cc
index f390823a8a0..db6decd84f9 100644
--- a/src/crimson/os/alienstore/alien_store.cc
+++ b/src/crimson/os/alienstore/alien_store.cc
@@ -141,7 +141,8 @@ seastar::future<> AlienStore::stop()
AlienStore::base_errorator::future<bool>
AlienStore::exists(
CollectionRef ch,
- const ghobject_t& oid)
+ const ghobject_t& oid,
+ uint32_t op_flags)
{
return op_gates.simple_dispatch("exists", [=, this] {
return tp->submit(ch->get_cid().hash_to_shard(tp->size()), [=, this] {
@@ -212,7 +213,8 @@ seastar::future<std::tuple<std::vector<ghobject_t>, ghobject_t>>
AlienStore::list_objects(CollectionRef ch,
const ghobject_t& start,
const ghobject_t& end,
- uint64_t limit) const
+ uint64_t limit,
+ uint32_t op_flags) const
{
logger().debug("{}", __func__);
assert(tp);
@@ -348,7 +350,8 @@ AlienStore::readv(CollectionRef ch,
AlienStore::get_attr_errorator::future<ceph::bufferlist>
AlienStore::get_attr(CollectionRef ch,
const ghobject_t& oid,
- std::string_view name) const
+ std::string_view name,
+ uint32_t op_flags) const
{
logger().debug("{}", __func__);
assert(tp);
@@ -376,7 +379,8 @@ AlienStore::get_attr(CollectionRef ch,
AlienStore::get_attrs_ertr::future<AlienStore::attrs_t>
AlienStore::get_attrs(CollectionRef ch,
- const ghobject_t& oid)
+ const ghobject_t& oid,
+ uint32_t op_flags)
{
logger().debug("{}", __func__);
assert(tp);
@@ -397,7 +401,8 @@ AlienStore::get_attrs(CollectionRef ch,
auto AlienStore::omap_get_values(CollectionRef ch,
const ghobject_t& oid,
- const set<string>& keys)
+ const set<string>& keys,
+ uint32_t op_flags)
-> read_errorator::future<omap_values_t>
{
logger().debug("{}", __func__);
@@ -421,7 +426,8 @@ auto AlienStore::omap_get_values(CollectionRef ch,
auto AlienStore::omap_get_values(CollectionRef ch,
const ghobject_t &oid,
- const std::optional<string> &start)
+ const std::optional<string> &start,
+ uint32_t op_flags)
-> read_errorator::future<std::tuple<bool, omap_values_t>>
{
logger().debug("{} with_start", __func__);
@@ -429,8 +435,21 @@ auto AlienStore::omap_get_values(CollectionRef ch,
return do_with_op_gate(omap_values_t{}, [=, this] (auto &values) {
return tp->submit(ch->get_cid().hash_to_shard(tp->size()), [=, this, &values] {
auto c = static_cast<AlienCollection*>(ch.get());
- return store->omap_get_values(c->collection, oid, start,
- reinterpret_cast<map<string, bufferlist>*>(&values));
+ return store->omap_iterate(
+ c->collection, oid,
+ ObjectStore::omap_iter_seek_t{
+ .seek_position = start.value_or(std::string{}),
+ // FIXME: classical OSDs begins iteration from LOWER_BOUND
+ // (or UPPER_BOUND if filter_prefix > start). However, these
+ // bits are not implemented yet
+ .seek_type = ObjectStore::omap_iter_seek_t::UPPER_BOUND
+ },
+ [&values]
+ (std::string_view key, std::string_view value) mutable {
+ values[std::string{key}].append(value);
+ // FIXME: there is limit on number of entries yet
+ return ObjectStore::omap_iter_ret_t::NEXT;
+ });
}).then([&values] (int r)
-> read_errorator::future<std::tuple<bool, omap_values_t>> {
if (r == -ENOENT) {
@@ -578,7 +597,8 @@ unsigned AlienStore::get_max_attr_name_length() const
seastar::future<struct stat> AlienStore::stat(
CollectionRef ch,
- const ghobject_t& oid)
+ const ghobject_t& oid,
+ uint32_t op_flags)
{
assert(tp);
return do_with_op_gate((struct stat){}, [this, ch, oid](auto& st) {
@@ -604,7 +624,8 @@ seastar::future<std::string> AlienStore::get_default_device_class()
}
auto AlienStore::omap_get_header(CollectionRef ch,
- const ghobject_t& oid)
+ const ghobject_t& oid,
+ uint32_t op_flags)
-> get_attr_errorator::future<ceph::bufferlist>
{
assert(tp);
@@ -630,7 +651,8 @@ AlienStore::read_errorator::future<std::map<uint64_t, uint64_t>> AlienStore::fie
CollectionRef ch,
const ghobject_t& oid,
uint64_t off,
- uint64_t len)
+ uint64_t len,
+ uint32_t op_flags)
{
assert(tp);
return do_with_op_gate(std::map<uint64_t, uint64_t>(), [=, this](auto& destmap) {
diff --git a/src/crimson/os/alienstore/alien_store.h b/src/crimson/os/alienstore/alien_store.h
index 853585dac9c..1d39411450e 100644
--- a/src/crimson/os/alienstore/alien_store.h
+++ b/src/crimson/os/alienstore/alien_store.h
@@ -36,7 +36,8 @@ public:
base_errorator::future<bool> exists(
CollectionRef c,
- const ghobject_t& oid) final;
+ const ghobject_t& oid,
+ uint32_t op_flags = 0) final;
mkfs_ertr::future<> mkfs(uuid_d new_osd_fsid) final;
read_errorator::future<ceph::bufferlist> read(CollectionRef c,
const ghobject_t& oid,
@@ -49,29 +50,36 @@ public:
uint32_t op_flags = 0) final;
- get_attr_errorator::future<ceph::bufferlist> get_attr(CollectionRef c,
- const ghobject_t& oid,
- std::string_view name) const final;
- get_attrs_ertr::future<attrs_t> get_attrs(CollectionRef c,
- const ghobject_t& oid) final;
+ get_attr_errorator::future<ceph::bufferlist> get_attr(
+ CollectionRef c,
+ const ghobject_t& oid,
+ std::string_view name,
+ uint32_t op_flags = 0) const final;
+ get_attrs_ertr::future<attrs_t> get_attrs(
+ CollectionRef c,
+ const ghobject_t& oid,
+ uint32_t op_flags = 0) final;
read_errorator::future<omap_values_t> omap_get_values(
CollectionRef c,
const ghobject_t& oid,
- const omap_keys_t& keys) final;
+ const omap_keys_t& keys,
+ uint32_t op_flags = 0) final;
/// Retrieves paged set of values > start (if present)
read_errorator::future<std::tuple<bool, omap_values_t>> omap_get_values(
CollectionRef c, ///< [in] collection
const ghobject_t &oid, ///< [in] oid
- const std::optional<std::string> &start ///< [in] start, empty for begin
+ const std::optional<std::string> &start, ///< [in] start, empty for begin
+ uint32_t op_flags = 0
) final; ///< @return <done, values> values.empty() iff done
seastar::future<std::tuple<std::vector<ghobject_t>, ghobject_t>> list_objects(
CollectionRef c,
const ghobject_t& start,
const ghobject_t& end,
- uint64_t limit) const final;
+ uint64_t limit,
+ uint32_t op_flags = 0) const final;
seastar::future<CollectionRef> create_new_collection(const coll_t& cid) final;
seastar::future<CollectionRef> open_collection(const coll_t& cid) final;
@@ -97,16 +105,19 @@ public:
unsigned get_max_attr_name_length() const final;
seastar::future<struct stat> stat(
CollectionRef,
- const ghobject_t&) final;
+ const ghobject_t&,
+ uint32_t op_flags = 0) final;
seastar::future<std::string> get_default_device_class() final;
get_attr_errorator::future<ceph::bufferlist> omap_get_header(
CollectionRef,
- const ghobject_t&) final;
+ const ghobject_t&,
+ uint32_t) final;
read_errorator::future<std::map<uint64_t, uint64_t>> fiemap(
CollectionRef,
const ghobject_t&,
uint64_t off,
- uint64_t len) final;
+ uint64_t len,
+ uint32_t op_flags) final;
FuturizedStore::Shard& get_sharded_store() final {
return *this;
diff --git a/src/crimson/os/cyanstore/cyan_store.cc b/src/crimson/os/cyanstore/cyan_store.cc
index a8bf514de15..41819fb5eb6 100644
--- a/src/crimson/os/cyanstore/cyan_store.cc
+++ b/src/crimson/os/cyanstore/cyan_store.cc
@@ -208,7 +208,8 @@ CyanStore::Shard::list_objects(
CollectionRef ch,
const ghobject_t& start,
const ghobject_t& end,
- uint64_t limit) const
+ uint64_t limit,
+ uint32_t op_flags) const
{
auto c = static_cast<Collection*>(ch.get());
logger().debug("{} {} {} {} {}",
@@ -257,7 +258,8 @@ CyanStore::Shard::list_collections()
CyanStore::Shard::base_errorator::future<bool>
CyanStore::Shard::exists(
CollectionRef ch,
- const ghobject_t &oid)
+ const ghobject_t &oid,
+ uint32_t op_flags)
{
auto c = static_cast<Collection*>(ch.get());
if (!c->exists) {
@@ -333,7 +335,8 @@ CyanStore::Shard::get_attr_errorator::future<ceph::bufferlist>
CyanStore::Shard::get_attr(
CollectionRef ch,
const ghobject_t& oid,
- std::string_view name) const
+ std::string_view name,
+ uint32_t op_flags) const
{
auto c = static_cast<Collection*>(ch.get());
logger().debug("{} {} {}",
@@ -352,7 +355,8 @@ CyanStore::Shard::get_attr(
CyanStore::Shard::get_attrs_ertr::future<CyanStore::Shard::attrs_t>
CyanStore::Shard::get_attrs(
CollectionRef ch,
- const ghobject_t& oid)
+ const ghobject_t& oid,
+ uint32_t op_flags)
{
auto c = static_cast<Collection*>(ch.get());
logger().debug("{} {} {}",
@@ -367,7 +371,8 @@ CyanStore::Shard::get_attrs(
auto CyanStore::Shard::omap_get_values(
CollectionRef ch,
const ghobject_t& oid,
- const omap_keys_t& keys)
+ const omap_keys_t& keys,
+ uint32_t op_flags)
-> read_errorator::future<omap_values_t>
{
auto c = static_cast<Collection*>(ch.get());
@@ -388,7 +393,8 @@ auto CyanStore::Shard::omap_get_values(
auto CyanStore::Shard::omap_get_values(
CollectionRef ch,
const ghobject_t &oid,
- const std::optional<string> &start)
+ const std::optional<string> &start,
+ uint32_t op_flags)
-> CyanStore::Shard::read_errorator::future<std::tuple<bool, omap_values_t>>
{
auto c = static_cast<Collection*>(ch.get());
@@ -409,7 +415,8 @@ auto CyanStore::Shard::omap_get_values(
auto CyanStore::Shard::omap_get_header(
CollectionRef ch,
- const ghobject_t& oid)
+ const ghobject_t& oid,
+ uint32_t op_flags)
-> CyanStore::Shard::get_attr_errorator::future<ceph::bufferlist>
{
auto c = static_cast<Collection*>(ch.get());
@@ -977,7 +984,8 @@ CyanStore::Shard::fiemap(
CollectionRef ch,
const ghobject_t& oid,
uint64_t off,
- uint64_t len)
+ uint64_t len,
+ uint32_t op_flags)
{
auto c = static_cast<Collection*>(ch.get());
@@ -992,7 +1000,8 @@ CyanStore::Shard::fiemap(
seastar::future<struct stat>
CyanStore::Shard::stat(
CollectionRef ch,
- const ghobject_t& oid)
+ const ghobject_t& oid,
+ uint32_t op_flags)
{
auto c = static_cast<Collection*>(ch.get());
auto o = c->get_object(oid);
diff --git a/src/crimson/os/cyanstore/cyan_store.h b/src/crimson/os/cyanstore/cyan_store.h
index e9394991bc2..1d481ef5829 100644
--- a/src/crimson/os/cyanstore/cyan_store.h
+++ b/src/crimson/os/cyanstore/cyan_store.h
@@ -34,11 +34,13 @@ public:
seastar::future<struct stat> stat(
CollectionRef c,
- const ghobject_t& oid) final;
+ const ghobject_t& oid,
+ uint32_t op_flags = 0) final;
base_errorator::future<bool> exists(
CollectionRef ch,
- const ghobject_t& oid) final;
+ const ghobject_t& oid,
+ uint32_t op_flags = 0) final;
read_errorator::future<ceph::bufferlist> read(
CollectionRef c,
@@ -56,33 +58,39 @@ public:
get_attr_errorator::future<ceph::bufferlist> get_attr(
CollectionRef c,
const ghobject_t& oid,
- std::string_view name) const final;
+ std::string_view name,
+ uint32_t op_flags = 0) const final;
get_attrs_ertr::future<attrs_t> get_attrs(
CollectionRef c,
- const ghobject_t& oid) final;
+ const ghobject_t& oid,
+ uint32_t op_flags = 0) final;
read_errorator::future<omap_values_t> omap_get_values(
CollectionRef c,
const ghobject_t& oid,
- const omap_keys_t& keys) final;
+ const omap_keys_t& keys,
+ uint32_t op_flags = 0) final;
read_errorator::future<std::tuple<bool, omap_values_t>> omap_get_values(
CollectionRef c, ///< [in] collection
const ghobject_t &oid, ///< [in] oid
- const std::optional<std::string> &start ///< [in] start, empty for begin
+ const std::optional<std::string> &start, ///< [in] start, empty for begin
+ uint32_t op_flags = 0
) final;
get_attr_errorator::future<ceph::bufferlist> omap_get_header(
CollectionRef c,
- const ghobject_t& oid) final;
+ const ghobject_t& oid,
+ uint32_t op_flags = 0) final;
seastar::future<std::tuple<std::vector<ghobject_t>, ghobject_t>>
list_objects(
CollectionRef c,
const ghobject_t& start,
const ghobject_t& end,
- uint64_t limit) const final;
+ uint64_t limit,
+ uint32_t op_flags = 0) const final;
seastar::future<CollectionRef> create_new_collection(const coll_t& cid) final;
@@ -101,7 +109,8 @@ public:
CollectionRef c,
const ghobject_t& oid,
uint64_t off,
- uint64_t len) final;
+ uint64_t len,
+ uint32_t op_flags) final;
unsigned get_max_attr_name_length() const final;
diff --git a/src/crimson/os/futurized_store.h b/src/crimson/os/futurized_store.h
index 51ef2331014..e7d4c8546de 100644
--- a/src/crimson/os/futurized_store.h
+++ b/src/crimson/os/futurized_store.h
@@ -54,7 +54,8 @@ public:
virtual base_errorator::future<bool> exists(
CollectionRef c,
- const ghobject_t& oid) = 0;
+ const ghobject_t& oid,
+ uint32_t op_flags = 0) = 0;
using get_attr_errorator = crimson::errorator<
crimson::ct_error::enoent,
@@ -62,42 +63,49 @@ public:
virtual get_attr_errorator::future<ceph::bufferlist> get_attr(
CollectionRef c,
const ghobject_t& oid,
- std::string_view name) const = 0;
+ std::string_view name,
+ uint32_t op_flags = 0) const = 0;
using get_attrs_ertr = crimson::errorator<
crimson::ct_error::enoent>;
using attrs_t = std::map<std::string, ceph::bufferlist, std::less<>>;
virtual get_attrs_ertr::future<attrs_t> get_attrs(
CollectionRef c,
- const ghobject_t& oid) = 0;
+ const ghobject_t& oid,
+ uint32_t op_flags = 0) = 0;
virtual seastar::future<struct stat> stat(
CollectionRef c,
- const ghobject_t& oid) = 0;
+ const ghobject_t& oid,
+ uint32_t op_flags = 0) = 0;
using omap_values_t = attrs_t;
using omap_keys_t = std::set<std::string>;
virtual read_errorator::future<omap_values_t> omap_get_values(
CollectionRef c,
const ghobject_t& oid,
- const omap_keys_t& keys) = 0;
+ const omap_keys_t& keys,
+ uint32_t op_flags = 0) = 0;
using omap_values_paged_t = std::tuple<bool, omap_values_t>;
virtual read_errorator::future<omap_values_paged_t> omap_get_values(
CollectionRef c, ///< [in] collection
const ghobject_t &oid, ///< [in] oid
- const std::optional<std::string> &start ///< [in] start, empty for begin
+ const std::optional<std::string> &start, ///< [in] start, empty for begin
+ uint32_t op_flags = 0
) = 0; ///< @return <done, values> values.empty() only if done
virtual get_attr_errorator::future<bufferlist> omap_get_header(
CollectionRef c,
- const ghobject_t& oid) = 0;
+ const ghobject_t& oid,
+ uint32_t op_flags = 0) = 0;
virtual seastar::future<std::tuple<std::vector<ghobject_t>, ghobject_t>> list_objects(
CollectionRef c,
const ghobject_t& start,
const ghobject_t& end,
- uint64_t limit) const = 0;
+ uint64_t limit,
+ uint32_t op_flags = 0) const = 0;
virtual seastar::future<CollectionRef> create_new_collection(const coll_t& cid) = 0;
@@ -153,7 +161,8 @@ public:
CollectionRef ch,
const ghobject_t& oid,
uint64_t off,
- uint64_t len) = 0;
+ uint64_t len,
+ uint32_t op_flags = 0) = 0;
virtual unsigned get_max_attr_name_length() const = 0;
};
diff --git a/src/crimson/os/seastore/async_cleaner.cc b/src/crimson/os/seastore/async_cleaner.cc
index 341c5c5524a..64e6749562e 100644
--- a/src/crimson/os/seastore/async_cleaner.cc
+++ b/src/crimson/os/seastore/async_cleaner.cc
@@ -609,6 +609,7 @@ JournalTrimmerImpl::trim_alloc()
return extent_callback->with_transaction_intr(
Transaction::src_t::TRIM_ALLOC,
"trim_alloc",
+ CACHE_HINT_NOCACHE,
[this, FNAME](auto &t)
{
auto target = get_alloc_tail_target();
@@ -653,6 +654,7 @@ JournalTrimmerImpl::trim_dirty()
return extent_callback->with_transaction_intr(
Transaction::src_t::TRIM_DIRTY,
"trim_dirty",
+ CACHE_HINT_NOCACHE,
[this, FNAME](auto &t)
{
auto target = get_dirty_tail_target();
@@ -1125,6 +1127,7 @@ SegmentCleaner::do_reclaim_space(
return extent_callback->with_transaction_intr(
src,
"clean_reclaim_space",
+ CACHE_HINT_NOCACHE,
[this, &backref_extents, &pin_list, &reclaimed](auto &t)
{
return seastar::do_with(
@@ -1240,6 +1243,7 @@ SegmentCleaner::clean_space_ret SegmentCleaner::clean_space()
return extent_callback->with_transaction_intr(
Transaction::src_t::READ,
"retrieve_from_backref_tree",
+ CACHE_HINT_NOCACHE,
[this, &weak_read_ret](auto &t) {
return backref_manager.get_mappings(
t,
@@ -1506,6 +1510,7 @@ bool SegmentCleaner::check_usage()
SpaceTrackerIRef tracker(space_tracker->make_empty());
extent_callback->with_transaction_weak(
"check_usage",
+ CACHE_HINT_NOCACHE,
[this, &tracker](auto &t) {
return backref_manager.scan_mapped_space(
t,
@@ -1812,6 +1817,7 @@ bool RBMCleaner::check_usage()
RBMSpaceTracker tracker(rbms);
extent_callback->with_transaction_weak(
"check_usage",
+ CACHE_HINT_NOCACHE,
[this, &tracker, &rbms](auto &t) {
return backref_manager.scan_mapped_space(
t,
diff --git a/src/crimson/os/seastore/async_cleaner.h b/src/crimson/os/seastore/async_cleaner.h
index 424247c5bdc..01ab44c4c7c 100644
--- a/src/crimson/os/seastore/async_cleaner.h
+++ b/src/crimson/os/seastore/async_cleaner.h
@@ -299,24 +299,29 @@ public:
/// Creates empty transaction
/// weak transaction should be type READ
virtual TransactionRef create_transaction(
- Transaction::src_t, const char *name, bool is_weak=false) = 0;
+ Transaction::src_t,
+ const char *name,
+ cache_hint_t cache_hint = CACHE_HINT_TOUCH,
+ bool is_weak=false) = 0;
/// Creates empty transaction with interruptible context
template <typename Func>
auto with_transaction_intr(
Transaction::src_t src,
const char* name,
+ cache_hint_t cache_hint,
Func &&f) {
return do_with_transaction_intr<Func, false>(
- src, name, std::forward<Func>(f));
+ src, name, cache_hint, std::forward<Func>(f));
}
template <typename Func>
auto with_transaction_weak(
const char* name,
+ cache_hint_t cache_hint,
Func &&f) {
return do_with_transaction_intr<Func, true>(
- Transaction::src_t::READ, name, std::forward<Func>(f)
+ Transaction::src_t::READ, name, cache_hint, std::forward<Func>(f)
).handle_error(
crimson::ct_error::eagain::assert_failure{"unexpected eagain"},
crimson::ct_error::pass_further_all{}
@@ -385,9 +390,10 @@ private:
auto do_with_transaction_intr(
Transaction::src_t src,
const char* name,
+ cache_hint_t cache_hint,
Func &&f) {
return seastar::do_with(
- create_transaction(src, name, IsWeak),
+ create_transaction(src, name, cache_hint, IsWeak),
[f=std::forward<Func>(f)](auto &ref_t) mutable {
return with_trans_intr(
*ref_t,
diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc
index 5898b9bad0a..86f816e1648 100644
--- a/src/crimson/os/seastore/cache.cc
+++ b/src/crimson/os/seastore/cache.cc
@@ -1483,7 +1483,7 @@ record_t Cache::prepare_record(
i->state = CachedExtent::extent_state_t::CLEAN;
assert(i->is_logical());
i->clear_modified_region();
- touch_extent(*i, &trans_src);
+ touch_extent(*i, &trans_src, t.get_cache_hint());
DEBUGT("inplace rewrite ool block is commmitted -- {}", t, *i);
}
@@ -1513,7 +1513,7 @@ record_t Cache::prepare_record(
if (i->is_dirty()) {
add_to_dirty(i, &t_src);
} else {
- touch_extent(*i, &t_src);
+ touch_extent(*i, &t_src, t.get_cache_hint());
}
alloc_delta.alloc_blk_ranges.emplace_back(
@@ -1759,7 +1759,7 @@ void Cache::complete_commit(
add_extent(i);
assert(!i->is_dirty());
const auto t_src = t.get_src();
- touch_extent(*i, &t_src);
+ touch_extent(*i, &t_src, t.get_cache_hint());
epm.commit_space_used(i->get_paddr(), i->get_length());
// Note: commit extents and backref allocations in the same place
@@ -2026,7 +2026,7 @@ Cache::replay_delta(
[](CachedExtent &) {},
[this](CachedExtent &ext) {
// replay is not included by the cache hit metrics
- touch_extent(ext, nullptr);
+ touch_extent(ext, nullptr, CACHE_HINT_TOUCH);
},
nullptr) :
_get_extent_if_cached(
diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h
index b2248ff37dd..a239b861726 100644
--- a/src/crimson/os/seastore/cache.h
+++ b/src/crimson/os/seastore/cache.h
@@ -124,6 +124,7 @@ public:
TransactionRef create_transaction(
Transaction::src_t src,
const char* name,
+ cache_hint_t cache_hint,
bool is_weak) {
LOG_PREFIX(Cache::create_transaction);
@@ -137,7 +138,8 @@ public:
[this](Transaction& t) {
return on_transaction_destruct(t);
},
- ++next_id
+ ++next_id,
+ cache_hint
);
SUBDEBUGT(seastore_t, "created name={}, source={}, is_weak={}",
*ret, name, src, is_weak);
@@ -284,7 +286,7 @@ public:
SUBDEBUGT(seastore_cache, "{} {} is present in cache -- {}",
t, type, offset, *ret);
t.add_to_read_set(ret);
- touch_extent(*ret, &t_src);
+ touch_extent(*ret, &t_src, t.get_cache_hint());
return ret->wait_io().then([ret] {
return get_extent_if_cached_iertr::make_ready_future<
CachedExtentRef>(ret);
@@ -341,7 +343,7 @@ public:
t, T::TYPE, offset, length);
auto f = [&t, this, t_src](CachedExtent &ext) {
t.add_to_read_set(CachedExtentRef(&ext));
- touch_extent(ext, &t_src);
+ touch_extent(ext, &t_src, t.get_cache_hint());
};
return trans_intr::make_interruptible(
do_get_caching_extent<T>(
@@ -389,7 +391,7 @@ public:
++stats.access.s.load_absent;
t.add_to_read_set(CachedExtentRef(&ext));
- touch_extent(ext, &t_src);
+ touch_extent(ext, &t_src, t.get_cache_hint());
};
return trans_intr::make_interruptible(
do_get_caching_extent<T>(
@@ -487,7 +489,7 @@ public:
++access_stats.cache_lru;
++stats.access.s.cache_lru;
}
- touch_extent(*p_extent, &t_src);
+ touch_extent(*p_extent, &t_src, t.get_cache_hint());
} else {
if (p_extent->is_dirty()) {
++access_stats.trans_dirty;
@@ -834,7 +836,7 @@ private:
t, type, offset, length, laddr);
auto f = [&t, this, t_src](CachedExtent &ext) {
t.add_to_read_set(CachedExtentRef(&ext));
- touch_extent(ext, &t_src);
+ touch_extent(ext, &t_src, t.get_cache_hint());
};
return trans_intr::make_interruptible(
do_get_caching_extent_by_type(
@@ -876,7 +878,7 @@ private:
++stats.access.s.load_absent;
t.add_to_read_set(CachedExtentRef(&ext));
- touch_extent(ext, &t_src);
+ touch_extent(ext, &t_src, t.get_cache_hint());
};
return trans_intr::make_interruptible(
do_get_caching_extent_by_type(
@@ -1472,11 +1474,10 @@ private:
/// Update lru for access to ref
void touch_extent(
CachedExtent &ext,
- const Transaction::src_t* p_src)
+ const Transaction::src_t* p_src,
+ cache_hint_t hint)
{
- if (p_src &&
- is_background_transaction(*p_src) &&
- is_logical_type(ext.get_type())) {
+ if (hint == CACHE_HINT_NOCACHE && is_logical_type(ext.get_type())) {
return;
}
if (ext.is_stable_clean() && !ext.is_placeholder()) {
diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc
index 5b51083f344..6a866cb1f9b 100644
--- a/src/crimson/os/seastore/seastore.cc
+++ b/src/crimson/os/seastore/seastore.cc
@@ -408,6 +408,7 @@ SeaStore::Shard::mkfs_managers()
return transaction_manager->with_transaction_intr(
Transaction::src_t::MUTATE,
"mkfs_seastore",
+ CACHE_HINT_TOUCH,
[this](auto& t)
{
LOG_PREFIX(SeaStoreS::mkfs_managers);
@@ -897,9 +898,10 @@ get_ranges(CollectionRef ch,
seastar::future<std::tuple<std::vector<ghobject_t>, ghobject_t>>
SeaStore::Shard::list_objects(CollectionRef ch,
- const ghobject_t& start,
- const ghobject_t& end,
- uint64_t limit) const
+ const ghobject_t& start,
+ const ghobject_t& end,
+ uint64_t limit,
+ uint32_t op_flags) const
{
++(shard_stats.read_num);
++(shard_stats.pending_read_num);
@@ -910,13 +912,14 @@ SeaStore::Shard::list_objects(CollectionRef ch,
return seastar::do_with(
RetType(std::vector<ghobject_t>(), start),
std::move(limit),
- [this, ch, start, end](auto& ret, auto& limit) {
- return repeat_eagain([this, ch, start, end, &limit, &ret] {
+ [this, ch, start, end, op_flags](auto& ret, auto& limit) {
+ return repeat_eagain([this, ch, start, end, &limit, &ret, op_flags] {
++(shard_stats.repeat_read_num);
return transaction_manager->with_transaction_intr(
Transaction::src_t::READ,
"list_objects",
+ op_flags,
[this, ch, start, end, &limit, &ret](auto &t)
{
LOG_PREFIX(SeaStoreS::list_objects);
@@ -1054,6 +1057,7 @@ SeaStore::Shard::list_collections()
return transaction_manager->with_transaction_intr(
Transaction::src_t::READ,
"list_collections",
+ CACHE_HINT_TOUCH,
[this, &ret](auto& t)
{
LOG_PREFIX(SeaStoreS::list_collections);
@@ -1137,6 +1141,7 @@ SeaStore::Shard::read(
Transaction::src_t::READ,
"read",
op_type_t::READ,
+ op_flags,
[this, offset, len, op_flags](auto &t, auto &onode) {
return _read(t, onode, offset, len, op_flags);
}).finally([this] {
@@ -1148,7 +1153,8 @@ SeaStore::Shard::read(
SeaStore::Shard::base_errorator::future<bool>
SeaStore::Shard::exists(
CollectionRef c,
- const ghobject_t& oid)
+ const ghobject_t& oid,
+ uint32_t op_flags)
{
LOG_PREFIX(SeaStoreS::exists);
++(shard_stats.read_num);
@@ -1160,6 +1166,7 @@ SeaStore::Shard::exists(
Transaction::src_t::READ,
"exists",
op_type_t::READ,
+ op_flags,
[FNAME](auto& t, auto&) {
DEBUGT("exists", t);
return seastar::make_ready_future<bool>(true);
@@ -1240,7 +1247,8 @@ SeaStore::Shard::get_attr_errorator::future<ceph::bufferlist>
SeaStore::Shard::get_attr(
CollectionRef ch,
const ghobject_t& oid,
- std::string_view name) const
+ std::string_view name,
+ uint32_t op_flags) const
{
++(shard_stats.read_num);
++(shard_stats.pending_read_num);
@@ -1251,6 +1259,7 @@ SeaStore::Shard::get_attr(
Transaction::src_t::READ,
"get_attr",
op_type_t::GET_ATTR,
+ op_flags,
[this, name](auto &t, auto& onode) {
return _get_attr(t, onode, name);
}).handle_error(
@@ -1296,7 +1305,8 @@ SeaStore::Shard::_get_attrs(
SeaStore::Shard::get_attrs_ertr::future<SeaStore::Shard::attrs_t>
SeaStore::Shard::get_attrs(
CollectionRef ch,
- const ghobject_t& oid)
+ const ghobject_t& oid,
+ uint32_t op_flags)
{
++(shard_stats.read_num);
++(shard_stats.pending_read_num);
@@ -1307,6 +1317,7 @@ SeaStore::Shard::get_attrs(
Transaction::src_t::READ,
"get_attrs",
op_type_t::GET_ATTRS,
+ op_flags,
[this](auto &t, auto& onode) {
return _get_attrs(t, onode);
}).handle_error(
@@ -1338,7 +1349,8 @@ seastar::future<struct stat> SeaStore::Shard::_stat(
seastar::future<struct stat> SeaStore::Shard::stat(
CollectionRef c,
- const ghobject_t& oid)
+ const ghobject_t& oid,
+ uint32_t op_flags)
{
++(shard_stats.read_num);
++(shard_stats.pending_read_num);
@@ -1349,6 +1361,7 @@ seastar::future<struct stat> SeaStore::Shard::stat(
Transaction::src_t::READ,
"stat",
op_type_t::STAT,
+ op_flags,
[this, oid](auto &t, auto &onode) {
return _stat(t, onode, oid);
}).handle_error(
@@ -1364,9 +1377,10 @@ seastar::future<struct stat> SeaStore::Shard::stat(
SeaStore::Shard::get_attr_errorator::future<ceph::bufferlist>
SeaStore::Shard::omap_get_header(
CollectionRef ch,
- const ghobject_t& oid)
+ const ghobject_t& oid,
+ uint32_t op_flags)
{
- return get_attr(ch, oid, OMAP_HEADER_XATTR_KEY);
+ return get_attr(ch, oid, OMAP_HEADER_XATTR_KEY, op_flags);
}
SeaStore::base_iertr::future<SeaStore::Shard::omap_values_t>
@@ -1389,7 +1403,8 @@ SeaStore::Shard::read_errorator::future<SeaStore::Shard::omap_values_t>
SeaStore::Shard::omap_get_values(
CollectionRef ch,
const ghobject_t &oid,
- const omap_keys_t &keys)
+ const omap_keys_t &keys,
+ uint32_t op_flags)
{
++(shard_stats.read_num);
++(shard_stats.pending_read_num);
@@ -1400,6 +1415,7 @@ SeaStore::Shard::omap_get_values(
Transaction::src_t::READ,
"omap_get_values",
op_type_t::OMAP_GET_VALUES,
+ op_flags,
[this, keys](auto &t, auto &onode) {
return do_omap_get_values(t, onode, keys);
}).finally([this] {
@@ -1529,7 +1545,8 @@ SeaStore::Shard::read_errorator::future<SeaStore::Shard::omap_values_paged_t>
SeaStore::Shard::omap_get_values(
CollectionRef ch,
const ghobject_t &oid,
- const std::optional<std::string> &start)
+ const std::optional<std::string> &start,
+ uint32_t op_flags)
{
++(shard_stats.read_num);
++(shard_stats.pending_read_num);
@@ -1540,6 +1557,7 @@ SeaStore::Shard::omap_get_values(
Transaction::src_t::READ,
"omap_get_values2",
op_type_t::OMAP_GET_VALUES2,
+ op_flags,
[this, start](auto &t, auto &onode) {
return do_omap_get_values(t, onode, start);
}).finally([this] {
@@ -1589,7 +1607,8 @@ SeaStore::Shard::fiemap(
CollectionRef ch,
const ghobject_t& oid,
uint64_t off,
- uint64_t len)
+ uint64_t len,
+ uint32_t op_flags)
{
++(shard_stats.read_num);
++(shard_stats.pending_read_num);
@@ -1600,6 +1619,7 @@ SeaStore::Shard::fiemap(
Transaction::src_t::READ,
"fiemap",
op_type_t::READ,
+ op_flags,
[this, off, len](auto &t, auto &onode) {
return _fiemap(t, onode, off, len);
}).finally([this] {
@@ -2677,6 +2697,7 @@ seastar::future<> SeaStore::Shard::write_meta(
return transaction_manager->with_transaction_intr(
Transaction::src_t::MUTATE,
"write_meta",
+ CACHE_HINT_NOCACHE,
[this, &key, &value](auto& t)
{
LOG_PREFIX(SeaStoreS::write_meta);
diff --git a/src/crimson/os/seastore/seastore.h b/src/crimson/os/seastore/seastore.h
index fd7e177da63..e2a993b9e20 100644
--- a/src/crimson/os/seastore/seastore.h
+++ b/src/crimson/os/seastore/seastore.h
@@ -101,7 +101,8 @@ public:
seastar::future<struct stat> stat(
CollectionRef c,
- const ghobject_t& oid) final;
+ const ghobject_t& oid,
+ uint32_t op_flags = 0) final;
read_errorator::future<ceph::bufferlist> read(
CollectionRef c,
@@ -118,32 +119,38 @@ public:
base_errorator::future<bool> exists(
CollectionRef c,
- const ghobject_t& oid) final;
+ const ghobject_t& oid,
+ uint32_t op_flags = 0) final;
get_attr_errorator::future<ceph::bufferlist> get_attr(
CollectionRef c,
const ghobject_t& oid,
- std::string_view name) const final;
+ std::string_view name,
+ uint32_t op_flags = 0) const final;
get_attrs_ertr::future<attrs_t> get_attrs(
CollectionRef c,
- const ghobject_t& oid) final;
+ const ghobject_t& oid,
+ uint32_t op_flags = 0) final;
read_errorator::future<omap_values_t> omap_get_values(
CollectionRef c,
const ghobject_t& oid,
- const omap_keys_t& keys) final;
+ const omap_keys_t& keys,
+ uint32_t op_flags = 0) final;
/// Retrieves paged set of values > start (if present)
read_errorator::future<omap_values_paged_t> omap_get_values(
CollectionRef c, ///< [in] collection
const ghobject_t &oid, ///< [in] oid
- const std::optional<std::string> &start ///< [in] start, empty for begin
+ const std::optional<std::string> &start, ///< [in] start, empty for begin
+ uint32_t op_flags = 0
) final; ///< @return <done, values> values.empty() iff done
get_attr_errorator::future<bufferlist> omap_get_header(
CollectionRef c,
- const ghobject_t& oid) final;
+ const ghobject_t& oid,
+ uint32_t op_flags = 0) final;
/// std::get<1>(ret) returns end if and only if the listing has listed all
/// the items within the range, otherwise it returns the next key to be listed.
@@ -151,7 +158,8 @@ public:
CollectionRef c,
const ghobject_t& start,
const ghobject_t& end,
- uint64_t limit) const final;
+ uint64_t limit,
+ uint32_t op_flags = 0) const final;
seastar::future<CollectionRef> create_new_collection(const coll_t& cid) final;
seastar::future<CollectionRef> open_collection(const coll_t& cid) final;
@@ -170,7 +178,8 @@ public:
CollectionRef ch,
const ghobject_t& oid,
uint64_t off,
- uint64_t len) final;
+ uint64_t len,
+ uint32_t op_flags = 0) final;
unsigned get_max_attr_name_length() const final {
return 256;
@@ -251,7 +260,8 @@ public:
return seastar::do_with(
internal_context_t(
ch, std::move(t),
- transaction_manager->create_transaction(src, tname)),
+ transaction_manager->create_transaction(
+ src, tname, t.get_fadvise_flags())),
std::forward<F>(f),
[this, op_type](auto &ctx, auto &f) {
assert(shard_stats.starting_io_num);
@@ -298,20 +308,22 @@ public:
Transaction::src_t src,
const char* tname,
op_type_t op_type,
+ cache_hint_t cache_hint_flags,
F &&f) const {
auto begin_time = std::chrono::steady_clock::now();
return seastar::do_with(
oid, Ret{}, std::forward<F>(f),
- [this, ch, src, op_type, begin_time, tname
+ [this, ch, src, op_type, begin_time, tname, cache_hint_flags
](auto &oid, auto &ret, auto &f)
{
- return repeat_eagain([&, this, ch, src, tname] {
+ return repeat_eagain([&, this, ch, src, tname, cache_hint_flags] {
assert(src == Transaction::src_t::READ);
++(shard_stats.repeat_read_num);
return transaction_manager->with_transaction_intr(
src,
tname,
+ cache_hint_flags,
[&, this, ch, tname](auto& t)
{
LOG_PREFIX(SeaStoreS::repeat_with_onode);
diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h
index 335a439dcb5..5930469ca07 100644
--- a/src/crimson/os/seastore/seastore_types.h
+++ b/src/crimson/os/seastore/seastore_types.h
@@ -20,9 +20,42 @@
#include "include/intarith.h"
#include "include/interval_set.h"
#include "include/uuid.h"
+#include "include/rados.h"
namespace crimson::os::seastore {
+class cache_hint_t {
+ enum hint_t {
+ TOUCH,
+ NOCACHE
+ };
+public:
+ static constexpr cache_hint_t get_touch() {
+ return hint_t::TOUCH;
+ }
+ static constexpr cache_hint_t get_nocache() {
+ return hint_t::NOCACHE;
+ }
+ cache_hint_t(uint32_t flags) {
+ if (unlikely(flags & CEPH_OSD_OP_FLAG_FADVISE_DONTNEED) ||
+ unlikely(flags & CEPH_OSD_OP_FLAG_FADVISE_NOCACHE)) {
+ hint = NOCACHE;
+ }
+ }
+ bool operator==(const cache_hint_t &other) const {
+ return hint == other.hint;
+ }
+ bool operator!=(const cache_hint_t &other) const {
+ return hint != other.hint;
+ }
+private:
+ constexpr cache_hint_t(hint_t hint) : hint(hint) {}
+ hint_t hint = hint_t::TOUCH;
+};
+
+inline constexpr cache_hint_t CACHE_HINT_TOUCH = cache_hint_t::get_touch();
+inline constexpr cache_hint_t CACHE_HINT_NOCACHE = cache_hint_t::get_nocache();
+
/* using a special xattr key "omap_header" to store omap header */
const std::string OMAP_HEADER_XATTR_KEY = "omap_header";
diff --git a/src/crimson/os/seastore/transaction.h b/src/crimson/os/seastore/transaction.h
index 66a9f896520..cd8c333c69f 100644
--- a/src/crimson/os/seastore/transaction.h
+++ b/src/crimson/os/seastore/transaction.h
@@ -409,12 +409,14 @@ public:
src_t src,
journal_seq_t initiated_after,
on_destruct_func_t&& f,
- transaction_id_t trans_id
+ transaction_id_t trans_id,
+ cache_hint_t cache_hint
) : weak(weak),
handle(std::move(handle)),
on_destruct(std::move(f)),
src(src),
- trans_id(trans_id)
+ trans_id(trans_id),
+ cache_hint(cache_hint)
{}
void invalidate_clear_write_set() {
@@ -573,6 +575,10 @@ public:
return pre_alloc_list;
}
+ cache_hint_t get_cache_hint() const {
+ return cache_hint;
+ }
+
private:
friend class Cache;
friend Ref make_test_transaction();
@@ -682,6 +688,8 @@ private:
seastar::lw_shared_ptr<rbm_pending_ool_t> pending_ool;
backref_entry_refs_t backref_entries;
+
+ cache_hint_t cache_hint = CACHE_HINT_TOUCH;
};
using TransactionRef = Transaction::Ref;
@@ -694,7 +702,8 @@ inline TransactionRef make_test_transaction() {
Transaction::src_t::MUTATE,
JOURNAL_SEQ_NULL,
[](Transaction&) {},
- ++next_id
+ ++next_id,
+ CACHE_HINT_TOUCH
);
}
diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc
index 753bd5d6ff6..807d88b2cbc 100644
--- a/src/crimson/os/seastore/transaction_manager.cc
+++ b/src/crimson/os/seastore/transaction_manager.cc
@@ -66,6 +66,7 @@ TransactionManager::mkfs_ertr::future<> TransactionManager::mkfs()
return with_transaction_intr(
Transaction::src_t::MUTATE,
"mkfs_tm",
+ CACHE_HINT_TOUCH,
[this, FNAME](auto& t)
{
cache->init();
@@ -131,6 +132,7 @@ TransactionManager::mount()
journal->get_trimmer().set_journal_head(start_seq);
return with_transaction_weak(
"mount",
+ CACHE_HINT_TOUCH,
[this](auto &t)
{
return cache->init_cached_extents(t, [this](auto &t, auto &e) {
diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h
index dc6cc20cf59..e574460894a 100644
--- a/src/crimson/os/seastore/transaction_manager.h
+++ b/src/crimson/os/seastore/transaction_manager.h
@@ -741,8 +741,9 @@ public:
TransactionRef create_transaction(
Transaction::src_t src,
const char* name,
+ cache_hint_t cache_hint = CACHE_HINT_TOUCH,
bool is_weak=false) final {
- return cache->create_transaction(src, name, is_weak);
+ return cache->create_transaction(src, name, cache_hint, is_weak);
}
using ExtentCallbackInterface::submit_transaction_direct_ret;
diff --git a/src/crimson/osd/backfill_facades.h b/src/crimson/osd/backfill_facades.h
index 64544d4c870..ce649303d4f 100644
--- a/src/crimson/osd/backfill_facades.h
+++ b/src/crimson/osd/backfill_facades.h
@@ -82,6 +82,9 @@ struct PGFacade final : BackfillState::PGFacade {
}
PGFacade(PG& pg) : pg(pg) {}
+ std::ostream &print(std::ostream &out) const override {
+ return out << pg;
+ }
};
} // namespace crimson::osd
diff --git a/src/crimson/osd/backfill_state.cc b/src/crimson/osd/backfill_state.cc
index 837fd2eb2af..f957f072c93 100644
--- a/src/crimson/osd/backfill_state.cc
+++ b/src/crimson/osd/backfill_state.cc
@@ -8,11 +8,7 @@
#include "crimson/osd/backfill_state.h"
#include "osd/osd_types_fmt.h"
-namespace {
- seastar::logger& logger() {
- return crimson::get_logger(ceph_subsys_osd);
- }
-}
+SET_SUBSYS(osd);
namespace crimson::osd {
@@ -27,22 +23,23 @@ BackfillState::BackfillState(
progress_tracker(
std::make_unique<BackfillState::ProgressTracker>(backfill_machine))
{
- logger().debug("{}:{}", __func__, __LINE__);
+ LOG_PREFIX(BackfillState::BackfillState);
+ DEBUGDPP("", *backfill_machine.pg);
backfill_machine.initiate();
}
template <class S>
BackfillState::StateHelper<S>::StateHelper()
{
- logger().debug("enter {}",
- boost::typeindex::type_id<S>().pretty_name());
+ LOG_PREFIX(BackfillState::StateHelper);
+ DEBUGDPP("enter {}", pg(), boost::typeindex::type_id<S>().pretty_name());
}
template <class S>
BackfillState::StateHelper<S>::~StateHelper()
{
- logger().debug("exit {}",
- boost::typeindex::type_id<S>().pretty_name());
+ LOG_PREFIX(BackfillState::StateHelper);
+ DEBUG("exit {}", boost::typeindex::type_id<S>().pretty_name());
}
BackfillState::~BackfillState() = default;
@@ -63,13 +60,16 @@ BackfillState::BackfillMachine::~BackfillMachine() = default;
BackfillState::Initial::Initial(my_context ctx)
: my_base(ctx)
{
+ LOG_PREFIX(BackfillState::Initial::Initial);
backfill_state().last_backfill_started = peering_state().earliest_backfill();
- logger().debug("{}: bft={} from {}",
- __func__, peering_state().get_backfill_targets(),
- backfill_state().last_backfill_started);
+ DEBUGDPP("{}: bft={} from {}",
+ pg(),
+ __func__,
+ peering_state().get_backfill_targets(),
+ backfill_state().last_backfill_started);
for (const auto& bt : peering_state().get_backfill_targets()) {
- logger().debug("{}: target shard {} from {}",
- __func__, bt, peering_state().get_peer_last_backfill(bt));
+ DEBUGDPP("{}: target shard {} from {}",
+ pg(), __func__, bt, peering_state().get_peer_last_backfill(bt));
}
ceph_assert(peering_state().get_backfill_targets().size());
ceph_assert(!backfill_state().last_backfill_started.is_max());
@@ -80,7 +80,8 @@ BackfillState::Initial::Initial(my_context ctx)
boost::statechart::result
BackfillState::Initial::react(const BackfillState::Triggered& evt)
{
- logger().debug("{}: backfill triggered", __func__);
+ LOG_PREFIX(BackfillState::Initial::react::Triggered);
+ DEBUGDPP("", pg());
ceph_assert(backfill_state().last_backfill_started == \
peering_state().earliest_backfill());
ceph_assert(peering_state().is_backfilling());
@@ -93,26 +94,10 @@ BackfillState::Initial::react(const BackfillState::Triggered& evt)
if (Enqueuing::all_enqueued(peering_state(),
backfill_state().backfill_info,
backfill_state().peer_backfill_info)) {
- logger().debug("{}: switching to Done state", __func__);
+ DEBUGDPP("switching to Done state", pg());
return transit<BackfillState::Done>();
} else {
- logger().debug("{}: switching to Enqueuing state", __func__);
- return transit<BackfillState::Enqueuing>();
- }
-}
-
-boost::statechart::result
-BackfillState::Cancelled::react(const BackfillState::Triggered& evt)
-{
- logger().debug("{}: backfill re-triggered", __func__);
- ceph_assert(peering_state().is_backfilling());
- if (Enqueuing::all_enqueued(peering_state(),
- backfill_state().backfill_info,
- backfill_state().peer_backfill_info)) {
- logger().debug("{}: switching to Done state", __func__);
- return transit<BackfillState::Done>();
- } else {
- logger().debug("{}: switching to Enqueuing state", __func__);
+ DEBUGDPP("switching to Enqueuing state", pg());
return transit<BackfillState::Enqueuing>();
}
}
@@ -120,9 +105,10 @@ BackfillState::Cancelled::react(const BackfillState::Triggered& evt)
// -- Enqueuing
void BackfillState::Enqueuing::maybe_update_range()
{
+ LOG_PREFIX(BackfillState::Enqueuing::maybe_update_range);
if (auto& primary_bi = backfill_state().backfill_info;
primary_bi.version >= pg().get_projected_last_update()) {
- logger().info("{}: bi is current", __func__);
+ INFODPP("bi is current", pg());
ceph_assert(primary_bi.version == pg().get_projected_last_update());
} else if (primary_bi.version >= peering_state().get_log_tail()) {
if (peering_state().get_pg_log().get_log().empty() &&
@@ -136,31 +122,31 @@ void BackfillState::Enqueuing::maybe_update_range()
ceph_assert(primary_bi.version == eversion_t());
return;
}
- logger().debug("{}: bi is old, ({}) can be updated with log to {}",
- __func__,
- primary_bi.version,
- pg().get_projected_last_update());
+ DEBUGDPP("{}: bi is old, ({}) can be updated with log to {}",
+ pg(),
+ primary_bi.version,
+ pg().get_projected_last_update());
auto func =
[&](const pg_log_entry_t& e) {
- logger().debug("maybe_update_range(lambda): updating from version {}",
- e.version);
+ DEBUGDPP("maybe_update_range(lambda): updating from version {}",
+ pg(), e.version);
if (e.soid >= primary_bi.begin && e.soid < primary_bi.end) {
if (e.is_update()) {
- logger().debug("maybe_update_range(lambda): {} updated to ver {}",
- e.soid, e.version);
+ DEBUGDPP("maybe_update_range(lambda): {} updated to ver {}",
+ pg(), e.soid, e.version);
primary_bi.objects.erase(e.soid);
primary_bi.objects.insert(std::make_pair(e.soid,
e.version));
} else if (e.is_delete()) {
- logger().debug("maybe_update_range(lambda): {} removed",
- e.soid);
+ DEBUGDPP("maybe_update_range(lambda): {} removed",
+ pg(), e.soid);
primary_bi.objects.erase(e.soid);
}
}
};
- logger().debug("{}: scanning pg log first", __func__);
+ DEBUGDPP("{}: scanning pg log first", pg());
peering_state().scan_log_after(primary_bi.version, func);
- logger().debug("{}: scanning projected log", __func__);
+ DEBUGDPP("{}: scanning projected log", pg());
pg().get_projected_log().scan_log_after(primary_bi.version, func);
primary_bi.version = pg().get_projected_last_update();
} else {
@@ -244,6 +230,7 @@ void BackfillState::Enqueuing::trim_backfilled_object_from_intervals(
BackfillState::Enqueuing::result_t
BackfillState::Enqueuing::remove_on_peers(const hobject_t& check)
{
+ LOG_PREFIX(BackfillState::Enqueuing::remove_on_peers);
// set `new_last_backfill_started` to `check`
result_t result { {}, check };
for (const auto& bt : peering_state().get_backfill_targets()) {
@@ -255,8 +242,8 @@ BackfillState::Enqueuing::remove_on_peers(const hobject_t& check)
backfill_listener().enqueue_drop(bt, pbi.begin, version);
}
}
- logger().debug("{}: BACKFILL removing {} from peers {}",
- __func__, check, result.pbi_targets);
+ DEBUGDPP("BACKFILL removing {} from peers {}",
+ pg(), check, result.pbi_targets);
ceph_assert(!result.pbi_targets.empty());
return result;
}
@@ -264,7 +251,8 @@ BackfillState::Enqueuing::remove_on_peers(const hobject_t& check)
BackfillState::Enqueuing::result_t
BackfillState::Enqueuing::update_on_peers(const hobject_t& check)
{
- logger().debug("{}: check={}", __func__, check);
+ LOG_PREFIX(BackfillState::Enqueuing::update_on_peers);
+ DEBUGDPP("check={}", pg(), check);
const auto& primary_bi = backfill_state().backfill_info;
result_t result { {}, primary_bi.begin };
std::map<hobject_t, std::pair<eversion_t, std::vector<pg_shard_t>>> backfills;
@@ -325,6 +313,7 @@ bool BackfillState::Enqueuing::Enqueuing::all_emptied(
BackfillState::Enqueuing::Enqueuing(my_context ctx)
: my_base(ctx)
{
+ LOG_PREFIX(BackfillState::Enqueuing::Enqueuing);
auto& primary_bi = backfill_state().backfill_info;
// update our local interval to cope with recent changes
@@ -334,8 +323,7 @@ BackfillState::Enqueuing::Enqueuing(my_context ctx)
// that backfill will be spinning here over and over. For the sake
// of performance and complexity we don't synchronize with entire PG.
// similar can happen in classical OSD.
- logger().warn("{}: bi is old, rescanning of local backfill_info",
- __func__);
+ WARNDPP("bi is old, rescanning of local backfill_info", pg());
post_event(RequestPrimaryScanning{});
return;
} else {
@@ -347,13 +335,14 @@ BackfillState::Enqueuing::Enqueuing(my_context ctx)
primary_bi)) {
// need to grab one another chunk of the object namespace and restart
// the queueing.
- logger().debug("{}: reached end for current local chunk", __func__);
+ DEBUGDPP("reached end for current local chunk", pg());
post_event(RequestPrimaryScanning{});
return;
}
do {
if (!backfill_listener().budget_available()) {
+ DEBUGDPP("throttle failed, turning to Waiting", pg());
post_event(RequestWaiting{});
return;
} else if (should_rescan_replicas(backfill_state().peer_backfill_info,
@@ -392,16 +381,25 @@ BackfillState::Enqueuing::Enqueuing(my_context ctx)
}
} while (!all_emptied(primary_bi, backfill_state().peer_backfill_info));
- if (backfill_state().progress_tracker->tracked_objects_completed()
- && Enqueuing::all_enqueued(peering_state(),
- backfill_state().backfill_info,
- backfill_state().peer_backfill_info)) {
- backfill_state().last_backfill_started = hobject_t::get_max();
- backfill_listener().update_peers_last_backfill(hobject_t::get_max());
+ if (should_rescan_primary(backfill_state().peer_backfill_info,
+ primary_bi)) {
+ // need to grab one another chunk of the object namespace and restart
+ // the queueing.
+ DEBUGDPP("reached end for current local chunk", pg());
+ post_event(RequestPrimaryScanning{});
+ return;
+ } else {
+ if (backfill_state().progress_tracker->tracked_objects_completed()
+ && Enqueuing::all_enqueued(peering_state(),
+ backfill_state().backfill_info,
+ backfill_state().peer_backfill_info)) {
+ backfill_state().last_backfill_started = hobject_t::get_max();
+ backfill_listener().update_peers_last_backfill(hobject_t::get_max());
+ }
+ DEBUGDPP("reached end for both local and all peers "
+ "but still has in-flight operations", pg());
+ post_event(RequestWaiting{});
}
- logger().debug("{}: reached end for both local and all peers "
- "but still has in-flight operations", __func__);
- post_event(RequestWaiting{});
}
// -- PrimaryScanning
@@ -416,16 +414,45 @@ BackfillState::PrimaryScanning::PrimaryScanning(my_context ctx)
boost::statechart::result
BackfillState::PrimaryScanning::react(PrimaryScanned evt)
{
- logger().debug("{}", __func__);
+ LOG_PREFIX(BackfillState::PrimaryScanning::react::PrimaryScanned);
+ DEBUGDPP("", pg());
backfill_state().backfill_info = std::move(evt.result);
- return transit<Enqueuing>();
+ if (!backfill_state().is_suspended()) {
+ return transit<Enqueuing>();
+ } else {
+ DEBUGDPP("backfill suspended, not going Enqueuing", pg());
+ backfill_state().go_enqueuing_on_resume();
+ }
+ return discard_event();
+}
+
+boost::statechart::result
+BackfillState::PrimaryScanning::react(CancelBackfill evt)
+{
+ LOG_PREFIX(BackfillState::PrimaryScanning::react::SuspendBackfill);
+ DEBUGDPP("suspended within PrimaryScanning", pg());
+ backfill_state().on_suspended();
+ return discard_event();
+}
+
+boost::statechart::result
+BackfillState::PrimaryScanning::react(Triggered evt)
+{
+ LOG_PREFIX(BackfillState::PrimaryScanning::react::Triggered);
+ ceph_assert(backfill_state().is_suspended());
+ if (backfill_state().on_resumed()) {
+ DEBUGDPP("Backfill resumed, going Enqueuing", pg());
+ return transit<Enqueuing>();
+ }
+ return discard_event();
}
boost::statechart::result
BackfillState::PrimaryScanning::react(ObjectPushed evt)
{
- logger().debug("PrimaryScanning::react() on ObjectPushed; evt.object={}",
- evt.object);
+ LOG_PREFIX(BackfillState::PrimaryScanning::react::ObjectPushed);
+ DEBUGDPP("PrimaryScanning::react() on ObjectPushed; evt.object={}",
+ pg(), evt.object);
backfill_state().progress_tracker->complete_to(evt.object, evt.stat, true);
return discard_event();
}
@@ -443,11 +470,11 @@ bool BackfillState::ReplicasScanning::replica_needs_scan(
BackfillState::ReplicasScanning::ReplicasScanning(my_context ctx)
: my_base(ctx)
{
+ LOG_PREFIX(BackfillState::ReplicasScanning::ReplicasScanning);
for (const auto& bt : peering_state().get_backfill_targets()) {
if (const auto& pbi = backfill_state().peer_backfill_info.at(bt);
replica_needs_scan(pbi, backfill_state().backfill_info)) {
- logger().debug("{}: scanning peer osd.{} from {}",
- __func__, bt, pbi.end);
+ DEBUGDPP("scanning peer osd.{} from {}", pg(), bt, pbi.end);
backfill_listener().request_replica_scan(bt, pbi.end, hobject_t{});
ceph_assert(waiting_on_backfill.find(bt) == \
@@ -469,8 +496,9 @@ BackfillState::ReplicasScanning::~ReplicasScanning()
boost::statechart::result
BackfillState::ReplicasScanning::react(ReplicaScanned evt)
{
- logger().debug("{}: got scan result from osd={}, result={}",
- __func__, evt.from, evt.result);
+ LOG_PREFIX(BackfillState::ReplicasScanning::react::ReplicaScanned);
+ DEBUGDPP("got scan result from osd={}, result={}",
+ pg(), evt.from, evt.result);
// TODO: maybe we'll be able to move waiting_on_backfill from
// the machine to the state.
ceph_assert(peering_state().is_backfill_target(evt.from));
@@ -479,12 +507,17 @@ BackfillState::ReplicasScanning::react(ReplicaScanned evt)
if (waiting_on_backfill.empty()) {
ceph_assert(backfill_state().peer_backfill_info.size() == \
peering_state().get_backfill_targets().size());
- return transit<Enqueuing>();
+ if (!backfill_state().is_suspended()) {
+ return transit<Enqueuing>();
+ } else {
+ DEBUGDPP("backfill suspended, not going Enqueuing", pg());
+ backfill_state().go_enqueuing_on_resume();
+ }
}
} else {
- // we canceled backfill for a while due to a too full, and this
+ // we suspended backfill for a while due to a too full, and this
// is an extra response from a non-too-full peer
- logger().debug("{}: canceled backfill (too full?)", __func__);
+ DEBUGDPP("suspended backfill (too full?)", pg());
}
return discard_event();
}
@@ -492,17 +525,30 @@ BackfillState::ReplicasScanning::react(ReplicaScanned evt)
boost::statechart::result
BackfillState::ReplicasScanning::react(CancelBackfill evt)
{
- logger().debug("{}: cancelled within ReplicasScanning",
- __func__);
- waiting_on_backfill.clear();
- return transit<Cancelled>();
+ LOG_PREFIX(BackfillState::ReplicasScanning::react::SuspendBackfill);
+ DEBUGDPP("suspended within ReplicasScanning", pg());
+ backfill_state().on_suspended();
+ return discard_event();
+}
+
+boost::statechart::result
+BackfillState::ReplicasScanning::react(Triggered evt)
+{
+ LOG_PREFIX(BackfillState::ReplicasScanning::react::Triggered);
+ ceph_assert(backfill_state().is_suspended());
+ if (backfill_state().on_resumed()) {
+ DEBUGDPP("Backfill resumed, going Enqueuing", pg());
+ return transit<Enqueuing>();
+ }
+ return discard_event();
}
boost::statechart::result
BackfillState::ReplicasScanning::react(ObjectPushed evt)
{
- logger().debug("ReplicasScanning::react() on ObjectPushed; evt.object={}",
- evt.object);
+ LOG_PREFIX(BackfillState::ReplicasScanning::react::ObjectPushed);
+ DEBUGDPP("ReplicasScanning::react() on ObjectPushed; evt.object={}",
+ pg(), evt.object);
backfill_state().progress_tracker->complete_to(evt.object, evt.stat, true);
return discard_event();
}
@@ -517,17 +563,45 @@ BackfillState::Waiting::Waiting(my_context ctx)
boost::statechart::result
BackfillState::Waiting::react(ObjectPushed evt)
{
- logger().debug("Waiting::react() on ObjectPushed; evt.object={}",
- evt.object);
+ LOG_PREFIX(BackfillState::Waiting::react::ObjectPushed);
+ DEBUGDPP("Waiting::react() on ObjectPushed; evt.object={}", pg(), evt.object);
backfill_state().progress_tracker->complete_to(evt.object, evt.stat, false);
- return transit<Enqueuing>();;
+ if (!backfill_state().is_suspended()) {
+ return transit<Enqueuing>();
+ } else {
+ DEBUGDPP("backfill suspended, not going Enqueuing", pg());
+ backfill_state().go_enqueuing_on_resume();
+ }
+ return discard_event();
+}
+
+boost::statechart::result
+BackfillState::Waiting::react(CancelBackfill evt)
+{
+ LOG_PREFIX(BackfillState::Waiting::react::SuspendBackfill);
+ DEBUGDPP("suspended within Waiting", pg());
+ backfill_state().on_suspended();
+ return discard_event();
+}
+
+boost::statechart::result
+BackfillState::Waiting::react(Triggered evt)
+{
+ LOG_PREFIX(BackfillState::Waiting::react::Triggered);
+ ceph_assert(backfill_state().is_suspended());
+ if (backfill_state().on_resumed()) {
+ DEBUGDPP("Backfill resumed, going Enqueuing", pg());
+ return transit<Enqueuing>();
+ }
+ return discard_event();
}
// -- Done
BackfillState::Done::Done(my_context ctx)
: my_base(ctx)
{
- logger().info("{}: backfill is done", __func__);
+ LOG_PREFIX(BackfillState::Done::Done);
+ INFODPP("backfill is done", pg());
backfill_listener().backfilled();
}
@@ -537,13 +611,6 @@ BackfillState::Crashed::Crashed()
ceph_abort_msg("{}: this should not happen");
}
-// -- Cancelled
-BackfillState::Cancelled::Cancelled(my_context ctx)
- : my_base(ctx)
-{
- ceph_assert(peering_state().get_backfill_targets().size());
-}
-
// ProgressTracker is an intermediary between the BackfillListener and
// BackfillMachine + its states. All requests to push or drop an object
// are directed through it. The same happens with notifications about
@@ -577,8 +644,8 @@ void BackfillState::ProgressTracker::complete_to(
const pg_stat_t& stats,
bool may_push_to_max)
{
- logger().debug("{}: obj={}",
- __func__, obj);
+ LOG_PREFIX(BackfillState::ProgressTracker::complete_to);
+ DEBUGDPP("obj={}", pg(), obj);
if (auto completion_iter = registry.find(obj);
completion_iter != std::end(registry)) {
completion_iter->second = \
@@ -619,4 +686,19 @@ void BackfillState::enqueue_standalone_push(
backfill_machine.backfill_listener.enqueue_push(obj, v, peers);
}
+void BackfillState::enqueue_standalone_delete(
+ const hobject_t &obj,
+ const eversion_t &v,
+ const std::vector<pg_shard_t> &peers)
+{
+ progress_tracker->enqueue_drop(obj);
+ for (auto bt : peers) {
+ backfill_machine.backfill_listener.enqueue_drop(bt, obj, v);
+ }
+}
+
+std::ostream &operator<<(std::ostream &out, const BackfillState::PGFacade &pg) {
+ return pg.print(out);
+}
+
} // namespace crimson::osd
diff --git a/src/crimson/osd/backfill_state.h b/src/crimson/osd/backfill_state.h
index 072c91e079d..517a02ea4df 100644
--- a/src/crimson/osd/backfill_state.h
+++ b/src/crimson/osd/backfill_state.h
@@ -62,6 +62,8 @@ struct BackfillState {
struct CancelBackfill : sc::event<CancelBackfill> {
};
+ struct ThrottleAcquired : sc::event<ThrottleAcquired> {
+ };
private:
// internal events
struct RequestPrimaryScanning : sc::event<RequestPrimaryScanning> {
@@ -136,34 +138,10 @@ public:
explicit Crashed();
};
- struct Cancelled : sc::state<Cancelled, BackfillMachine>,
- StateHelper<Cancelled> {
- using reactions = boost::mpl::list<
- sc::custom_reaction<Triggered>,
- sc::custom_reaction<PrimaryScanned>,
- sc::custom_reaction<ReplicaScanned>,
- sc::custom_reaction<ObjectPushed>,
- sc::transition<sc::event_base, Crashed>>;
- explicit Cancelled(my_context);
- // resume after triggering backfill by on_activate_complete().
- // transit to Enqueuing.
- sc::result react(const Triggered&);
- sc::result react(const PrimaryScanned&) {
- return discard_event();
- }
- sc::result react(const ReplicaScanned&) {
- return discard_event();
- }
- sc::result react(const ObjectPushed&) {
- return discard_event();
- }
- };
-
struct Initial : sc::state<Initial, BackfillMachine>,
StateHelper<Initial> {
using reactions = boost::mpl::list<
sc::custom_reaction<Triggered>,
- sc::transition<CancelBackfill, Cancelled>,
sc::transition<sc::event_base, Crashed>>;
explicit Initial(my_context);
// initialize after triggering backfill by on_activate_complete().
@@ -174,12 +152,9 @@ public:
struct Enqueuing : sc::state<Enqueuing, BackfillMachine>,
StateHelper<Enqueuing> {
using reactions = boost::mpl::list<
- sc::transition<CancelBackfill, Cancelled>,
sc::transition<RequestPrimaryScanning, PrimaryScanning>,
sc::transition<RequestReplicasScanning, ReplicasScanning>,
sc::transition<RequestWaiting, Waiting>,
- sc::transition<RequestDone, Done>,
- sc::transition<CancelBackfill, Cancelled>,
sc::transition<sc::event_base, Crashed>>;
explicit Enqueuing(my_context);
@@ -237,12 +212,15 @@ public:
sc::custom_reaction<ObjectPushed>,
sc::custom_reaction<PrimaryScanned>,
sc::transition<RequestDone, Done>,
- sc::transition<CancelBackfill, Cancelled>,
+ sc::custom_reaction<CancelBackfill>,
+ sc::custom_reaction<Triggered>,
sc::transition<sc::event_base, Crashed>>;
explicit PrimaryScanning(my_context);
sc::result react(ObjectPushed);
// collect scanning result and transit to Enqueuing.
sc::result react(PrimaryScanned);
+ sc::result react(CancelBackfill);
+ sc::result react(Triggered);
};
struct ReplicasScanning : sc::state<ReplicasScanning, BackfillMachine>,
@@ -251,6 +229,7 @@ public:
sc::custom_reaction<ObjectPushed>,
sc::custom_reaction<ReplicaScanned>,
sc::custom_reaction<CancelBackfill>,
+ sc::custom_reaction<Triggered>,
sc::transition<RequestDone, Done>,
sc::transition<sc::event_base, Crashed>>;
explicit ReplicasScanning(my_context);
@@ -259,6 +238,7 @@ public:
sc::result react(ObjectPushed);
sc::result react(ReplicaScanned);
sc::result react(CancelBackfill);
+ sc::result react(Triggered);
// indicate whether a particular peer should be scanned to retrieve
// BackfillInterval for new range of hobject_t namespace.
@@ -277,10 +257,14 @@ public:
using reactions = boost::mpl::list<
sc::custom_reaction<ObjectPushed>,
sc::transition<RequestDone, Done>,
- sc::transition<CancelBackfill, Cancelled>,
+ sc::custom_reaction<CancelBackfill>,
+ sc::custom_reaction<Triggered>,
+ sc::transition<ThrottleAcquired, Enqueuing>,
sc::transition<sc::event_base, Crashed>>;
explicit Waiting(my_context);
sc::result react(ObjectPushed);
+ sc::result react(CancelBackfill);
+ sc::result react(Triggered);
};
struct Done : sc::state<Done, BackfillMachine>,
@@ -308,6 +292,11 @@ public:
const hobject_t &obj,
const eversion_t &v,
const std::vector<pg_shard_t> &peers);
+ void enqueue_standalone_delete(
+ const hobject_t &obj,
+ const eversion_t &v,
+ const std::vector<pg_shard_t> &peers);
+
bool is_triggered() const {
return backfill_machine.triggering_event() != nullptr;
@@ -325,6 +314,26 @@ public:
}
}
private:
+ struct backfill_suspend_state_t {
+ bool suspended = false;
+ bool should_go_enqueuing = false;
+ } backfill_suspend_state;
+ bool is_suspended() const {
+ return backfill_suspend_state.suspended;
+ }
+ void on_suspended() {
+ ceph_assert(!is_suspended());
+ backfill_suspend_state = {true, false};
+ }
+ bool on_resumed() {
+ auto go_enqueuing = backfill_suspend_state.should_go_enqueuing;
+ backfill_suspend_state = {false, false};
+ return go_enqueuing;
+ }
+ void go_enqueuing_on_resume() {
+ ceph_assert(is_suspended());
+ backfill_suspend_state.should_go_enqueuing = true;
+ }
hobject_t last_backfill_started;
BackfillInterval backfill_info;
std::map<pg_shard_t, BackfillInterval> peer_backfill_info;
@@ -405,8 +414,10 @@ struct BackfillState::PGFacade {
virtual const eversion_t& get_projected_last_update() const = 0;
virtual const PGLog::IndexedLog& get_projected_log() const = 0;
+ virtual std::ostream &print(std::ostream &out) const = 0;
virtual ~PGFacade() {}
};
+std::ostream &operator<<(std::ostream &out, const BackfillState::PGFacade &pg);
class BackfillState::ProgressTracker {
// TODO: apply_stat,
@@ -433,6 +444,9 @@ class BackfillState::ProgressTracker {
BackfillListener& backfill_listener() {
return backfill_machine.backfill_listener;
}
+ PGFacade& pg() {
+ return *backfill_machine.pg;
+ }
public:
ProgressTracker(BackfillMachine& backfill_machine)
@@ -447,3 +461,9 @@ public:
};
} // namespace crimson::osd
+
+#if FMT_VERSION >= 90000
+template <> struct fmt::formatter<crimson::osd::BackfillState::PGFacade>
+ : fmt::ostream_formatter {};
+#endif
+
diff --git a/src/crimson/osd/osd_operation.h b/src/crimson/osd/osd_operation.h
index f8815a316d1..394375c1129 100644
--- a/src/crimson/osd/osd_operation.h
+++ b/src/crimson/osd/osd_operation.h
@@ -217,6 +217,9 @@ protected:
public:
static constexpr bool is_trackable = true;
+ virtual bool requires_pg() const {
+ return true;
+ }
};
template <class T>
@@ -338,6 +341,18 @@ public:
with_throttle_while(std::forward<Args>(args)...), *this);
}
+ // Returns std::nullopt if the throttle is acquired immediately,
+ // returns the future for the acquiring otherwise
+ std::optional<seastar::future<>>
+ try_acquire_throttle_now(crimson::osd::scheduler::params_t params) {
+ if (!max_in_progress || in_progress < max_in_progress) {
+ ++in_progress;
+ --pending;
+ return std::nullopt;
+ }
+ return acquire_throttle(params);
+ }
+
private:
void dump_detail(Formatter *f) const final;
diff --git a/src/crimson/osd/osd_operations/client_request.h b/src/crimson/osd/osd_operations/client_request.h
index 98443bdfc0f..91a6728fd4b 100644
--- a/src/crimson/osd/osd_operations/client_request.h
+++ b/src/crimson/osd/osd_operations/client_request.h
@@ -42,6 +42,10 @@ class ClientRequest final : public PhasedOperationT<ClientRequest>,
unsigned instance_id = 0;
public:
+ epoch_t get_epoch_sent_at() const {
+ return m->get_map_epoch();
+ }
+
/**
* instance_handle_t
*
diff --git a/src/crimson/osd/osd_operations/logmissing_request.h b/src/crimson/osd/osd_operations/logmissing_request.h
index e12243ce430..fe4761c4ab4 100644
--- a/src/crimson/osd/osd_operations/logmissing_request.h
+++ b/src/crimson/osd/osd_operations/logmissing_request.h
@@ -36,6 +36,9 @@ public:
}
PipelineHandle &get_handle() { return handle; }
epoch_t get_epoch() const { return req->get_min_epoch(); }
+ epoch_t get_epoch_sent_at() const {
+ return req->get_map_epoch();
+ }
ConnectionPipeline &get_connection_pipeline();
diff --git a/src/crimson/osd/osd_operations/logmissing_request_reply.h b/src/crimson/osd/osd_operations/logmissing_request_reply.h
index 71651d16789..bdb6c2ac6ac 100644
--- a/src/crimson/osd/osd_operations/logmissing_request_reply.h
+++ b/src/crimson/osd/osd_operations/logmissing_request_reply.h
@@ -36,6 +36,9 @@ public:
}
PipelineHandle &get_handle() { return handle; }
epoch_t get_epoch() const { return req->get_min_epoch(); }
+ epoch_t get_epoch_sent_at() const {
+ return req->get_map_epoch();
+ }
ConnectionPipeline &get_connection_pipeline();
diff --git a/src/crimson/osd/osd_operations/peering_event.h b/src/crimson/osd/osd_operations/peering_event.h
index 85de5c711d6..aa6b8a95a94 100644
--- a/src/crimson/osd/osd_operations/peering_event.h
+++ b/src/crimson/osd/osd_operations/peering_event.h
@@ -44,6 +44,10 @@ protected:
float delay = 0;
PGPeeringEvent evt;
+ epoch_t get_epoch_sent_at() const {
+ return evt.get_epoch_sent();
+ }
+
const pg_shard_t get_from() const {
return from;
}
@@ -84,6 +88,10 @@ public:
evt(std::forward<Args>(args)...)
{}
+ bool requires_pg() const final {
+ return evt.requires_pg;
+ }
+
void print(std::ostream &) const final;
void dump_detail(ceph::Formatter* f) const final;
seastar::future<> with_pg(
diff --git a/src/crimson/osd/osd_operations/pg_advance_map.h b/src/crimson/osd/osd_operations/pg_advance_map.h
index 43be7319545..21702f6ff4f 100644
--- a/src/crimson/osd/osd_operations/pg_advance_map.h
+++ b/src/crimson/osd/osd_operations/pg_advance_map.h
@@ -50,6 +50,10 @@ public:
PGPeeringPipeline::Process::BlockingEvent
> tracking_events;
+ epoch_t get_epoch_sent_at() const {
+ return to;
+ }
+
private:
PGPeeringPipeline &peering_pp(PG &pg);
};
diff --git a/src/crimson/osd/osd_operations/recovery_subrequest.h b/src/crimson/osd/osd_operations/recovery_subrequest.h
index 17c2faf97ea..2fe8ff372b3 100644
--- a/src/crimson/osd/osd_operations/recovery_subrequest.h
+++ b/src/crimson/osd/osd_operations/recovery_subrequest.h
@@ -39,6 +39,9 @@ public:
}
PipelineHandle &get_handle() { return handle; }
epoch_t get_epoch() const { return m->get_min_epoch(); }
+ epoch_t get_epoch_sent_at() const {
+ return m->get_map_epoch();
+ }
ConnectionPipeline &get_connection_pipeline();
diff --git a/src/crimson/osd/osd_operations/replicated_request.h b/src/crimson/osd/osd_operations/replicated_request.h
index 37d2771a33b..c2494b3715f 100644
--- a/src/crimson/osd/osd_operations/replicated_request.h
+++ b/src/crimson/osd/osd_operations/replicated_request.h
@@ -36,6 +36,9 @@ public:
}
PipelineHandle &get_handle() { return handle; }
epoch_t get_epoch() const { return req->get_min_epoch(); }
+ epoch_t get_epoch_sent_at() const {
+ return req->get_map_epoch();
+ }
ConnectionPipeline &get_connection_pipeline();
diff --git a/src/crimson/osd/osd_operations/scrub_events.h b/src/crimson/osd/osd_operations/scrub_events.h
index 02a5d852bb7..8bed90e4c14 100644
--- a/src/crimson/osd/osd_operations/scrub_events.h
+++ b/src/crimson/osd/osd_operations/scrub_events.h
@@ -27,11 +27,11 @@ class RemoteScrubEventBaseT : public PhasedOperationT<T> {
crimson::net::ConnectionRef l_conn;
crimson::net::ConnectionXcoreRef r_conn;
- epoch_t epoch;
spg_t pgid;
protected:
using interruptor = InterruptibleOperation::interruptor;
+ epoch_t epoch;
template <typename U=void>
using ifut = InterruptibleOperation::interruptible_future<U>;
@@ -40,7 +40,7 @@ protected:
public:
RemoteScrubEventBaseT(
crimson::net::ConnectionRef conn, epoch_t epoch, spg_t pgid)
- : l_conn(std::move(conn)), epoch(epoch), pgid(pgid) {}
+ : l_conn(std::move(conn)), pgid(pgid), epoch(epoch) {}
PGPeeringPipeline &get_peering_pipeline(PG &pg);
@@ -117,6 +117,10 @@ public:
: RemoteScrubEventBaseT<ScrubRequested>(std::forward<Args>(base_args)...),
deep(deep) {}
+ epoch_t get_epoch_sent_at() const {
+ return epoch;
+ }
+
void print(std::ostream &out) const final {
out << "(deep=" << deep << ")";
}
@@ -141,6 +145,10 @@ public:
ceph_assert(scrub::PGScrubber::is_scrub_message(*m));
}
+ epoch_t get_epoch_sent_at() const {
+ return epoch;
+ }
+
void print(std::ostream &out) const final {
out << "(m=" << *m << ")";
}
diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc
index bf521498abf..2746e730f2b 100644
--- a/src/crimson/osd/pg.cc
+++ b/src/crimson/osd/pg.cc
@@ -879,6 +879,17 @@ void PG::enqueue_push_for_backfill(
backfill_state->enqueue_standalone_push(obj, v, peers);
}
+void PG::enqueue_delete_for_backfill(
+ const hobject_t &obj,
+ const eversion_t &v,
+ const std::vector<pg_shard_t> &peers)
+{
+ assert(recovery_handler);
+ assert(recovery_handler->backfill_state);
+ auto backfill_state = recovery_handler->backfill_state.get();
+ backfill_state->enqueue_standalone_delete(obj, v, peers);
+}
+
PG::interruptible_future<
std::tuple<PG::interruptible_future<>,
PG::interruptible_future<>>>
diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h
index 6db73ee835b..06038c0aa00 100644
--- a/src/crimson/osd/pg.h
+++ b/src/crimson/osd/pg.h
@@ -904,6 +904,11 @@ private:
const hobject_t &obj,
const eversion_t &v,
const std::vector<pg_shard_t> &peers);
+ void enqueue_delete_for_backfill(
+ const hobject_t &obj,
+ const eversion_t &v,
+ const std::vector<pg_shard_t> &peers);
+
bool can_discard_replica_op(const Message& m, epoch_t m_map_epoch) const;
bool can_discard_op(const MOSDOp& m) const;
void context_registry_on_change();
diff --git a/src/crimson/osd/pg_backend.cc b/src/crimson/osd/pg_backend.cc
index a40b28caa8b..79895de06de 100644
--- a/src/crimson/osd/pg_backend.cc
+++ b/src/crimson/osd/pg_backend.cc
@@ -1325,9 +1325,10 @@ maybe_get_omap_vals(
PGBackend::ll_read_ierrorator::future<ceph::bufferlist>
PGBackend::omap_get_header(
const crimson::os::CollectionRef& c,
- const ghobject_t& oid) const
+ const ghobject_t& oid,
+ uint32_t op_flags) const
{
- return store->omap_get_header(c, oid)
+ return store->omap_get_header(c, oid, op_flags)
.handle_error(
crimson::ct_error::enodata::handle([] {
return seastar::make_ready_future<bufferlist>();
@@ -1340,10 +1341,13 @@ PGBackend::ll_read_ierrorator::future<>
PGBackend::omap_get_header(
const ObjectState& os,
OSDOp& osd_op,
- object_stat_sum_t& delta_stats) const
+ object_stat_sum_t& delta_stats,
+ uint32_t op_flags) const
{
if (os.oi.is_omap()) {
- return omap_get_header(coll, ghobject_t{os.oi.soid}).safe_then_interruptible(
+ return omap_get_header(
+ coll, ghobject_t{os.oi.soid}, CEPH_OSD_OP_FLAG_FADVISE_DONTNEED
+ ).safe_then_interruptible(
[&delta_stats, &osd_op] (ceph::bufferlist&& header) {
osd_op.outdata = std::move(header);
delta_stats.num_rd_kb += shift_round_up(osd_op.outdata.length(), 10);
@@ -1707,7 +1711,8 @@ PGBackend::fiemap(
CollectionRef c,
const ghobject_t& oid,
uint64_t off,
- uint64_t len)
+ uint64_t len,
+ uint32_t op_flags)
{
return store->fiemap(c, oid, off, len);
}
diff --git a/src/crimson/osd/pg_backend.h b/src/crimson/osd/pg_backend.h
index c24176a10e7..9c2230375b0 100644
--- a/src/crimson/osd/pg_backend.h
+++ b/src/crimson/osd/pg_backend.h
@@ -315,7 +315,8 @@ public:
CollectionRef c,
const ghobject_t& oid,
uint64_t off,
- uint64_t len);
+ uint64_t len,
+ uint32_t op_flags = 0);
write_iertr::future<> tmapput(
ObjectState& os,
@@ -375,11 +376,13 @@ public:
object_stat_sum_t& delta_stats);
ll_read_ierrorator::future<ceph::bufferlist> omap_get_header(
const crimson::os::CollectionRef& c,
- const ghobject_t& oid) const;
+ const ghobject_t& oid,
+ uint32_t op_flags = 0) const;
ll_read_ierrorator::future<> omap_get_header(
const ObjectState& os,
OSDOp& osd_op,
- object_stat_sum_t& delta_stats) const;
+ object_stat_sum_t& delta_stats,
+ uint32_t op_flags = 0) const;
interruptible_future<> omap_set_header(
ObjectState& os,
const OSDOp& osd_op,
diff --git a/src/crimson/osd/pg_recovery.cc b/src/crimson/osd/pg_recovery.cc
index ec3af0d2b00..5eef584c776 100644
--- a/src/crimson/osd/pg_recovery.cc
+++ b/src/crimson/osd/pg_recovery.cc
@@ -67,8 +67,6 @@ PGRecovery::start_recovery_ops(
if (max_to_start > 0) {
max_to_start -= start_replica_recovery_ops(trigger, max_to_start, &started);
}
- using interruptor =
- crimson::interruptible::interruptor<crimson::osd::IOInterruptCondition>;
return interruptor::parallel_for_each(started,
[] (auto&& ifut) {
return std::move(ifut);
@@ -609,8 +607,21 @@ void PGRecovery::update_peers_last_backfill(
bool PGRecovery::budget_available() const
{
- // TODO: the limits!
- return true;
+ crimson::osd::scheduler::params_t params =
+ {1, 0, crimson::osd::scheduler::scheduler_class_t::background_best_effort};
+ auto &ss = pg->get_shard_services();
+ auto futopt = ss.try_acquire_throttle_now(std::move(params));
+ if (!futopt) {
+ return true;
+ }
+ std::ignore = interruptor::make_interruptible(std::move(*futopt)
+ ).then_interruptible([this] {
+ assert(!backfill_state->is_triggered());
+ using BackfillState = crimson::osd::BackfillState;
+ backfill_state->process_event(
+ BackfillState::ThrottleAcquired{}.intrusive_from_this());
+ });
+ return false;
}
void PGRecovery::on_pg_clean()
diff --git a/src/crimson/osd/pg_recovery.h b/src/crimson/osd/pg_recovery.h
index 657e6d3e888..5c7b5c5ef2b 100644
--- a/src/crimson/osd/pg_recovery.h
+++ b/src/crimson/osd/pg_recovery.h
@@ -25,6 +25,8 @@ class PGBackend;
class PGRecovery : public crimson::osd::BackfillState::BackfillListener {
public:
+ using interruptor =
+ crimson::interruptible::interruptor<crimson::osd::IOInterruptCondition>;
template <typename T = void>
using interruptible_future = RecoveryBackend::interruptible_future<T>;
PGRecovery(PGRecoveryListener* pg) : pg(pg) {}
diff --git a/src/crimson/osd/pg_shard_manager.h b/src/crimson/osd/pg_shard_manager.h
index b9879c8c9dd..f7bd7a6c08e 100644
--- a/src/crimson/osd/pg_shard_manager.h
+++ b/src/crimson/osd/pg_shard_manager.h
@@ -256,18 +256,40 @@ public:
auto &opref = *op;
return opref.template with_blocking_event<
PGMap::PGCreationBlockingEvent
- >([&target_shard_services, &opref](auto &&trigger) {
- return target_shard_services.wait_for_pg(
- std::move(trigger), opref.get_pgid());
- }).safe_then([&logger, &target_shard_services, &opref](Ref<PG> pgref) {
- logger.debug("{}: have_pg", opref);
- return opref.with_pg(target_shard_services, pgref);
- }).handle_error(
- crimson::ct_error::ecanceled::handle([&logger, &opref](auto) {
- logger.debug("{}: pg creation canceled, dropping", opref);
- return seastar::now();
- })
- ).then([op=std::move(op)] {});
+ >([&target_shard_services, &opref, &logger](auto &&trigger) mutable {
+ auto pg = target_shard_services.get_pg(opref.get_pgid());
+ auto fut = ShardServices::wait_for_pg_ertr::make_ready_future<Ref<PG>>(pg);
+ if (!pg) {
+ if (opref.requires_pg()) {
+ auto osdmap = target_shard_services.get_map();
+ if (!osdmap->is_up_acting_osd_shard(
+ opref.get_pgid(), target_shard_services.local_state.whoami)) {
+ logger.debug(
+ "pg {} for {} is no longer here, discarding",
+ opref.get_pgid(), opref);
+ opref.get_handle().exit();
+ auto _fut = seastar::now();
+ if (osdmap->get_epoch() > opref.get_epoch_sent_at()) {
+ _fut = target_shard_services.send_incremental_map(
+ std::ref(opref.get_foreign_connection()),
+ opref.get_epoch_sent_at() + 1);
+ }
+ return _fut;
+ }
+ }
+ fut = target_shard_services.wait_for_pg(
+ std::move(trigger), opref.get_pgid());
+ }
+ return fut.safe_then([&logger, &target_shard_services, &opref](Ref<PG> pgref) {
+ logger.debug("{}: have_pg", opref);
+ return opref.with_pg(target_shard_services, pgref);
+ }).handle_error(
+ crimson::ct_error::ecanceled::handle([&logger, &opref](auto) {
+ logger.debug("{}: pg creation canceled, dropping", opref);
+ return seastar::now();
+ })
+ );
+ }).then([op=std::move(op)] {});
}
seastar::future<> load_pgs(crimson::os::FuturizedStore& store);
diff --git a/src/crimson/osd/replicated_backend.cc b/src/crimson/osd/replicated_backend.cc
index f09cd147ea9..6c8abecffaf 100644
--- a/src/crimson/osd/replicated_backend.cc
+++ b/src/crimson/osd/replicated_backend.cc
@@ -96,11 +96,18 @@ ReplicatedBackend::submit_transaction(
bufferlist encoded_txn;
encode(txn, encoded_txn);
+ bool is_delete = false;
for (auto &le : log_entries) {
le.mark_unrollbackable();
+ if (le.is_delete()) {
+ is_delete = true;
+ }
}
+ co_await pg.update_snap_map(log_entries, txn);
+
std::vector<pg_shard_t> to_push_clone;
+ std::vector<pg_shard_t> to_push_delete;
auto sends = std::make_unique<std::vector<seastar::future<>>>();
for (auto &pg_shard : pg_shards) {
if (pg_shard == whoami) {
@@ -115,12 +122,17 @@ ReplicatedBackend::submit_transaction(
m = new_repop_msg(
pg_shard, hoid, encoded_txn, osd_op_p,
min_epoch, map_epoch, log_entries, false, tid);
- if (_new_clone && pg.is_missing_on_peer(pg_shard, hoid)) {
- // The head is in the push queue but hasn't been pushed yet.
- // We need to ensure that the newly created clone will be
- // pushed as well, otherwise we might skip it.
- // See: https://tracker.ceph.com/issues/68808
- to_push_clone.push_back(pg_shard);
+ if (pg.is_missing_on_peer(pg_shard, hoid)) {
+ if (_new_clone) {
+ // The head is in the push queue but hasn't been pushed yet.
+ // We need to ensure that the newly created clone will be
+ // pushed as well, otherwise we might skip it.
+ // See: https://tracker.ceph.com/issues/68808
+ to_push_clone.push_back(pg_shard);
+ }
+ if (is_delete) {
+ to_push_delete.push_back(pg_shard);
+ }
}
}
pending_txn->second.acked_peers.push_back({pg_shard, eversion_t{}});
@@ -130,8 +142,6 @@ ReplicatedBackend::submit_transaction(
pg_shard.osd, std::move(m), map_epoch));
}
- co_await pg.update_snap_map(log_entries, txn);
-
pg.log_operation(
std::move(log_entries),
osd_op_p.pg_trim_to,
@@ -157,7 +167,8 @@ ReplicatedBackend::submit_transaction(
return seastar::now();
}
return peers->all_committed.get_shared_future();
- }).then_interruptible([pending_txn, this, _new_clone,
+ }).then_interruptible([pending_txn, this, _new_clone, &hoid,
+ to_push_delete=std::move(to_push_delete),
to_push_clone=std::move(to_push_clone)] {
auto acked_peers = std::move(pending_txn->second.acked_peers);
pending_trans.erase(pending_txn);
@@ -167,6 +178,9 @@ ReplicatedBackend::submit_transaction(
_new_clone->obs.oi.version,
to_push_clone);
}
+ if (!to_push_delete.empty()) {
+ pg.enqueue_delete_for_backfill(hoid, {}, to_push_delete);
+ }
return seastar::make_ready_future<
crimson::osd::acked_peers_t>(std::move(acked_peers));
});
diff --git a/src/crimson/osd/replicated_recovery_backend.cc b/src/crimson/osd/replicated_recovery_backend.cc
index 76f24196b51..0d6c9d38236 100644
--- a/src/crimson/osd/replicated_recovery_backend.cc
+++ b/src/crimson/osd/replicated_recovery_backend.cc
@@ -35,6 +35,15 @@ ReplicatedRecoveryBackend::recover_object(
logger().debug("recover_object: loading obc: {}", soid);
return pg.obc_loader.with_obc<RWState::RWREAD>(soid,
[this, soid, need](auto head, auto obc) {
+ if (!obc->obs.exists) {
+ // XXX: this recovery must be triggered by backfills and the corresponding
+ // object must have been deleted by some client request after the object
+ // is enqueued for push but before the lock is acquired by the recovery.
+ //
+ // Abort the recovery in this case, a "recover_delete" must have been
+ // added for this object by the client request that deleted it.
+ return interruptor::now();
+ }
logger().debug("recover_object: loaded obc: {}", obc->obs.oi.soid);
auto& recovery_waiter = get_recovering(soid);
recovery_waiter.obc = obc;
@@ -306,7 +315,10 @@ ReplicatedRecoveryBackend::recover_delete(
}
return seastar::make_ready_future<>();
}).then_interruptible([this, soid, &stat_diff] {
- pg.get_recovery_handler()->on_global_recover(soid, stat_diff, true);
+ const auto &missing = pg.get_peering_state().get_pg_log().get_missing();
+ if (!missing.is_missing(soid)) {
+ pg.get_recovery_handler()->on_global_recover(soid, stat_diff, true);
+ }
return seastar::make_ready_future<>();
});
});
@@ -568,14 +580,17 @@ ReplicatedRecoveryBackend::read_metadata_for_push_op(
return seastar::make_ready_future<eversion_t>(ver);
}
return interruptor::make_interruptible(interruptor::when_all_succeed(
- backend->omap_get_header(coll, ghobject_t(oid)).handle_error_interruptible<false>(
+ backend->omap_get_header(
+ coll, ghobject_t(oid), CEPH_OSD_OP_FLAG_FADVISE_DONTNEED
+ ).handle_error_interruptible<false>(
crimson::os::FuturizedStore::Shard::read_errorator::all_same_way(
[oid] (const std::error_code& e) {
logger().debug("read_metadata_for_push_op, error {} when getting omap header: {}", e, oid);
return seastar::make_ready_future<bufferlist>();
})),
- interruptor::make_interruptible(store->get_attrs(coll, ghobject_t(oid)))
- .handle_error_interruptible<false>(
+ interruptor::make_interruptible(
+ store->get_attrs(coll, ghobject_t(oid), CEPH_OSD_OP_FLAG_FADVISE_DONTNEED)
+ ).handle_error_interruptible<false>(
crimson::os::FuturizedStore::Shard::get_attrs_ertr::all_same_way(
[oid] (const std::error_code& e) {
logger().debug("read_metadata_for_push_op, error {} when getting attrs: {}", e, oid);
@@ -613,8 +628,14 @@ ReplicatedRecoveryBackend::read_object_for_push_op(
return seastar::make_ready_future<uint64_t>(offset);
}
// 1. get the extents in the interested range
- return interruptor::make_interruptible(backend->fiemap(coll, ghobject_t{oid},
- 0, copy_subset.range_end())).safe_then_interruptible(
+ return interruptor::make_interruptible(
+ backend->fiemap(
+ coll,
+ ghobject_t{oid},
+ 0,
+ copy_subset.range_end(),
+ CEPH_OSD_OP_FLAG_FADVISE_DONTNEED)
+ ).safe_then_interruptible(
[=, this](auto&& fiemap_included) mutable {
interval_set<uint64_t> extents;
try {
@@ -630,8 +651,12 @@ ReplicatedRecoveryBackend::read_object_for_push_op(
push_op->data_included.span_of(extents, offset, max_len);
// 3. read the truncated extents
// TODO: check if the returned extents are pruned
- return interruptor::make_interruptible(store->readv(coll, ghobject_t{oid},
- push_op->data_included, 0));
+ return interruptor::make_interruptible(
+ store->readv(
+ coll,
+ ghobject_t{oid},
+ push_op->data_included,
+ CEPH_OSD_OP_FLAG_FADVISE_DONTNEED));
}).safe_then_interruptible([push_op, range_end=copy_subset.range_end()](auto &&bl) {
push_op->data.claim_append(std::move(bl));
uint64_t recovered_to = 0;
diff --git a/src/crimson/osd/shard_services.cc b/src/crimson/osd/shard_services.cc
index c2340898929..e1acb34636f 100644
--- a/src/crimson/osd/shard_services.cc
+++ b/src/crimson/osd/shard_services.cc
@@ -783,6 +783,11 @@ seastar::future<> ShardServices::dispatch_context_transaction(
co_return;
}
+Ref<PG> ShardServices::get_pg(spg_t pgid)
+{
+ return local_state.get_pg(pgid);
+}
+
seastar::future<> ShardServices::dispatch_context_messages(
BufferedRecoveryMessages &&ctx)
{
diff --git a/src/crimson/osd/shard_services.h b/src/crimson/osd/shard_services.h
index 56ac4963fff..f1ed9b8d911 100644
--- a/src/crimson/osd/shard_services.h
+++ b/src/crimson/osd/shard_services.h
@@ -483,6 +483,8 @@ public:
return pg_to_shard_mapping.remove_pg_mapping(pgid);
}
+ Ref<PG> get_pg(spg_t pgid);
+
crimson::common::CephContext *get_cct() {
return &(local_state.cct);
}
@@ -589,6 +591,7 @@ public:
FORWARD_TO_OSD_SINGLETON(get_pool_info)
FORWARD(with_throttle_while, with_throttle_while, local_state.throttler)
+ FORWARD(try_acquire_throttle_now, try_acquire_throttle_now, local_state.throttler)
FORWARD_TO_OSD_SINGLETON(build_incremental_map_msg)
FORWARD_TO_OSD_SINGLETON(send_incremental_map)
diff --git a/src/crimson/tools/store_nbd/tm_driver.cc b/src/crimson/tools/store_nbd/tm_driver.cc
index 389ecd78afc..870809c5153 100644
--- a/src/crimson/tools/store_nbd/tm_driver.cc
+++ b/src/crimson/tools/store_nbd/tm_driver.cc
@@ -25,6 +25,7 @@ seastar::future<> TMDriver::write(
return tm->with_transaction_intr(
Transaction::src_t::MUTATE,
"write",
+ CACHE_HINT_TOUCH,
[this, offset, &ptr](auto& t)
{
return tm->remove(t, laddr_t::from_byte_offset(offset)
@@ -112,6 +113,7 @@ seastar::future<bufferlist> TMDriver::read(
return tm->with_transaction_intr(
Transaction::src_t::READ,
"read",
+ CACHE_HINT_TOUCH,
[=, &blret, this](auto& t)
{
return read_extents(t, laddr_t::from_byte_offset(offset), size
diff --git a/src/exporter/ceph_exporter.cc b/src/exporter/ceph_exporter.cc
index 44b67c7e615..2232851c094 100644
--- a/src/exporter/ceph_exporter.cc
+++ b/src/exporter/ceph_exporter.cc
@@ -30,13 +30,13 @@ static void handle_signal(int signum)
static void usage() {
std::cout << "usage: ceph-exporter [options]\n"
<< "options:\n"
- " --sock-dir: The path to ceph daemons socket files dir\n"
- " --addrs: Host ip address where exporter is deployed\n"
- " --port: Port to deploy exporter on. Default is 9926\n"
- " --cert-file: Path to the certificate file to use https\n"
- " --key-file: Path to the certificate key file to use https\n"
+ " --sock-dir: The path to Ceph daemon sockets (*.asok)\n"
+ " --addrs: Host IP address on which the exporter is to listen\n"
+ " --port: TCP Port on which the exporter is to listen. Default is 9926\n"
+ " --cert-file: Path to the certificate file when using HTTPS\n"
+ " --key-file: Path to the certificate key file when using HTTPS\n"
" --prio-limit: Only perf counters greater than or equal to prio-limit are fetched. Default: 5\n"
- " --stats-period: Time to wait before sending requests again to exporter server (seconds). Default: 5s"
+ " --stats-period: Interval between daemon scrapes (seconds). Default: 5s"
<< std::endl;
generic_server_usage();
}
diff --git a/src/include/rados/librados.hpp b/src/include/rados/librados.hpp
index 4a7ac3ea6e0..0f5a9036eff 100644
--- a/src/include/rados/librados.hpp
+++ b/src/include/rados/librados.hpp
@@ -202,6 +202,8 @@ inline namespace v14_2_0 {
int set_complete_callback(void *cb_arg, callback_t cb);
int set_safe_callback(void *cb_arg, callback_t cb)
__attribute__ ((deprecated));
+ /// Request immediate cancellation as if by IoCtx::aio_cancel().
+ int cancel();
int wait_for_complete();
int wait_for_safe() __attribute__ ((deprecated));
int wait_for_complete_and_cb();
@@ -772,17 +774,30 @@ inline namespace v14_2_0 {
void tier_evict();
};
- /* IoCtx : This is a context in which we can perform I/O.
- * It includes a Pool,
+ /**
+ * @brief A handle to a RADOS pool used to perform I/O operations.
*
* Typical use (error checking omitted):
- *
+ * @code
* IoCtx p;
* rados.ioctx_create("my_pool", p);
- * p->stat(&stats);
- * ... etc ...
+ * p.stat("my_object", &size, &mtime);
+ * @endcode
+ *
+ * IoCtx holds a pointer to its underlying implementation. The dup()
+ * method performs a deep copy of this implementation, but the copy
+ * construction and assignment operations perform shallow copies by
+ * sharing that pointer.
+ *
+ * Function names starting with aio_ are asynchronous operations that
+ * return immediately after submitting a request, and whose completions
+ * are managed by the given AioCompletion pointer. The IoCtx's underlying
+ * implementation is involved in the delivery of these completions, so
+ * the caller must guarantee that its lifetime is preserved until then -
+ * if not by preserving the IoCtx instance that submitted the request,
+ * then by a copied/moved instance that shares the same implementation.
*
- * NOTE: be sure to call watch_flush() prior to destroying any IoCtx
+ * @note Be sure to call watch_flush() prior to destroying any IoCtx
* that is used for watch events to ensure that racing callbacks
* have completed.
*/
@@ -791,9 +806,13 @@ inline namespace v14_2_0 {
public:
IoCtx();
static void from_rados_ioctx_t(rados_ioctx_t p, IoCtx &pool);
+ /// Construct a shallow copy of rhs, sharing its underlying implementation.
IoCtx(const IoCtx& rhs);
+ /// Assign a shallow copy of rhs, sharing its underlying implementation.
IoCtx& operator=(const IoCtx& rhs);
+ /// Move construct from rhs, transferring its underlying implementation.
IoCtx(IoCtx&& rhs) noexcept;
+ /// Move assign from rhs, transferring its underlying implementation.
IoCtx& operator=(IoCtx&& rhs) noexcept;
~IoCtx();
@@ -1150,7 +1169,8 @@ inline namespace v14_2_0 {
int aio_stat2(const std::string& oid, AioCompletion *c, uint64_t *psize, struct timespec *pts);
/**
- * Cancel aio operation
+ * Request immediate cancellation with error code -ECANCELED
+ * if the operation hasn't already completed.
*
* @param c completion handle
* @returns 0 on success, negative error code on failure
diff --git a/src/include/random.h b/src/include/random.h
index f2e3e37bcd7..6b7c9405efd 100644
--- a/src/include/random.h
+++ b/src/include/random.h
@@ -16,9 +16,9 @@
#define CEPH_RANDOM_H 1
#include <mutex>
+#include <optional>
#include <random>
#include <type_traits>
-#include <boost/optional.hpp>
// Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85494
#ifdef __MINGW32__
@@ -123,7 +123,7 @@ void randomize_rng()
template <typename EngineT>
EngineT& engine()
{
- thread_local boost::optional<EngineT> rng_engine;
+ thread_local std::optional<EngineT> rng_engine;
if (!rng_engine) {
rng_engine.emplace(EngineT());
diff --git a/src/kv/KeyValueDB.h b/src/kv/KeyValueDB.h
index 858742d511e..d926840180e 100644
--- a/src/kv/KeyValueDB.h
+++ b/src/kv/KeyValueDB.h
@@ -9,6 +9,7 @@
#include <map>
#include <optional>
#include <string>
+#include <string_view>
#include <boost/scoped_ptr.hpp>
#include "include/encoding.h"
#include "common/Formatter.h"
@@ -211,6 +212,10 @@ public:
return "";
}
virtual ceph::buffer::list value() = 0;
+ // When valid() returns true, value returned as string-view
+ // is guaranteed to be valid until iterator is moved to another
+ // position; that is until call to next() / seek_to_first() / etc.
+ virtual std::string_view value_as_sv() = 0;
virtual int status() = 0;
virtual ~SimplestIteratorImpl() {}
};
@@ -220,7 +225,12 @@ public:
virtual ~IteratorImpl() {}
virtual int seek_to_last() = 0;
virtual int prev() = 0;
+ // When valid() returns true, key returned as string-view
+ // is guaranteed to be valid until iterator is moved to another
+ // position; that is until call to next() / seek_to_first() / etc.
+ virtual std::string_view key_as_sv() = 0;
virtual std::pair<std::string, std::string> raw_key() = 0;
+ virtual std::pair<std::string_view, std::string_view> raw_key_as_sv() = 0;
virtual ceph::buffer::ptr value_as_ptr() {
ceph::buffer::list bl = value();
if (bl.length() == 1) {
@@ -247,7 +257,9 @@ public:
virtual int next() = 0;
virtual int prev() = 0;
virtual std::string key() = 0;
+ virtual std::string_view key_as_sv() = 0;
virtual std::pair<std::string,std::string> raw_key() = 0;
+ virtual std::pair<std::string_view, std::string_view> raw_key_as_sv() = 0;
virtual bool raw_key_is_prefixed(const std::string &prefix) = 0;
virtual ceph::buffer::list value() = 0;
virtual ceph::buffer::ptr value_as_ptr() {
@@ -258,6 +270,7 @@ public:
return ceph::buffer::ptr();
}
}
+ virtual std::string_view value_as_sv() = 0;
virtual int status() = 0;
virtual size_t key_size() {
return 0;
@@ -315,15 +328,24 @@ private:
std::string key() override {
return generic_iter->key();
}
+ std::string_view key_as_sv() override {
+ return generic_iter->key_as_sv();
+ }
std::pair<std::string, std::string> raw_key() override {
return generic_iter->raw_key();
}
+ std::pair<std::string_view, std::string_view> raw_key_as_sv() override {
+ return generic_iter->raw_key_as_sv();
+ }
ceph::buffer::list value() override {
return generic_iter->value();
}
ceph::buffer::ptr value_as_ptr() override {
return generic_iter->value_as_ptr();
}
+ std::string_view value_as_sv() override {
+ return generic_iter->value_as_sv();
+ }
int status() override {
return generic_iter->status();
}
diff --git a/src/kv/RocksDBStore.cc b/src/kv/RocksDBStore.cc
index ca63ea06484..51d224b67c0 100644
--- a/src/kv/RocksDBStore.cc
+++ b/src/kv/RocksDBStore.cc
@@ -6,6 +6,7 @@
#include <memory>
#include <set>
#include <string>
+#include <string_view>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
@@ -47,6 +48,7 @@ using std::ostream;
using std::pair;
using std::set;
using std::string;
+using std::string_view;
using std::unique_ptr;
using std::vector;
@@ -1992,7 +1994,7 @@ int RocksDBStore::split_key(rocksdb::Slice in, string *prefix, string *key)
// Find separator inside Slice
char* separator = (char*) memchr(in.data(), 0, in.size());
- if (separator == NULL)
+ if (separator == nullptr)
return -EINVAL;
prefix_len = size_t(separator - in.data());
if (prefix_len >= in.size())
@@ -2006,6 +2008,27 @@ int RocksDBStore::split_key(rocksdb::Slice in, string *prefix, string *key)
return 0;
}
+// TODO: deduplicate the code, preferrably by removing the string variant
+int RocksDBStore::split_key(rocksdb::Slice in, string_view *prefix, string_view *key)
+{
+ size_t prefix_len = 0;
+
+ // Find separator inside Slice
+ char* separator = (char*) memchr(in.data(), 0, in.size());
+ if (separator == nullptr)
+ return -EINVAL;
+ prefix_len = size_t(separator - in.data());
+ if (prefix_len >= in.size())
+ return -EINVAL;
+
+ // Fetch prefix and/or key directly from Slice
+ if (prefix)
+ *prefix = string_view(in.data(), prefix_len);
+ if (key)
+ *key = string_view(separator + 1, in.size() - prefix_len - 1);
+ return 0;
+}
+
void RocksDBStore::compact()
{
dout(2) << __func__ << " starting" << dendl;
@@ -2226,7 +2249,13 @@ int RocksDBStore::RocksDBWholeSpaceIteratorImpl::prev()
string RocksDBStore::RocksDBWholeSpaceIteratorImpl::key()
{
string out_key;
- split_key(dbiter->key(), 0, &out_key);
+ split_key(dbiter->key(), nullptr, &out_key);
+ return out_key;
+}
+string_view RocksDBStore::RocksDBWholeSpaceIteratorImpl::key_as_sv()
+{
+ string_view out_key;
+ split_key(dbiter->key(), nullptr, &out_key);
return out_key;
}
pair<string,string> RocksDBStore::RocksDBWholeSpaceIteratorImpl::raw_key()
@@ -2235,6 +2264,12 @@ pair<string,string> RocksDBStore::RocksDBWholeSpaceIteratorImpl::raw_key()
split_key(dbiter->key(), &prefix, &key);
return make_pair(prefix, key);
}
+pair<string_view,string_view> RocksDBStore::RocksDBWholeSpaceIteratorImpl::raw_key_as_sv()
+{
+ string_view prefix, key;
+ split_key(dbiter->key(), &prefix, &key);
+ return make_pair(prefix, key);
+}
bool RocksDBStore::RocksDBWholeSpaceIteratorImpl::raw_key_is_prefixed(const string &prefix) {
// Look for "prefix\0" right in rocksb::Slice
@@ -2267,6 +2302,12 @@ bufferptr RocksDBStore::RocksDBWholeSpaceIteratorImpl::value_as_ptr()
return bufferptr(val.data(), val.size());
}
+std::string_view RocksDBStore::RocksDBWholeSpaceIteratorImpl::value_as_sv()
+{
+ rocksdb::Slice val = dbiter->value();
+ return std::string_view{val.data(), val.size()};
+}
+
int RocksDBStore::RocksDBWholeSpaceIteratorImpl::status()
{
return dbiter->status().ok() ? 0 : -1;
@@ -2348,9 +2389,15 @@ public:
string key() override {
return dbiter->key().ToString();
}
+ string_view key_as_sv() override {
+ return dbiter->key().ToStringView();
+ }
std::pair<std::string, std::string> raw_key() override {
return make_pair(prefix, key());
}
+ std::pair<std::string_view, std::string_view> raw_key_as_sv() override {
+ return make_pair(prefix, dbiter->key().ToStringView());
+ }
bufferlist value() override {
return to_bufferlist(dbiter->value());
}
@@ -2358,6 +2405,10 @@ public:
rocksdb::Slice val = dbiter->value();
return bufferptr(val.data(), val.size());
}
+ std::string_view value_as_sv() override {
+ rocksdb::Slice val = dbiter->value();
+ return std::string_view{val.data(), val.size()};
+ }
int status() override {
return dbiter->status().ok() ? 0 : -1;
}
@@ -2668,6 +2719,15 @@ public:
}
}
+ std::string_view key_as_sv() override
+ {
+ if (smaller == on_main) {
+ return main->key_as_sv();
+ } else {
+ return current_shard->second->key_as_sv();
+ }
+ }
+
std::pair<std::string,std::string> raw_key() override
{
if (smaller == on_main) {
@@ -2677,6 +2737,15 @@ public:
}
}
+ std::pair<std::string_view,std::string_view> raw_key_as_sv() override
+ {
+ if (smaller == on_main) {
+ return main->raw_key_as_sv();
+ } else {
+ return { current_shard->first, current_shard->second->key_as_sv() };
+ }
+ }
+
bool raw_key_is_prefixed(const std::string &prefix) override
{
if (smaller == on_main) {
@@ -2695,6 +2764,15 @@ public:
}
}
+ std::string_view value_as_sv() override
+ {
+ if (smaller == on_main) {
+ return main->value_as_sv();
+ } else {
+ return current_shard->second->value_as_sv();
+ }
+ }
+
int status() override
{
//because we already had to inspect key, it must be ok
@@ -3017,9 +3095,15 @@ public:
string key() override {
return iters[0]->key().ToString();
}
+ string_view key_as_sv() override {
+ return iters[0]->key().ToStringView();
+ }
std::pair<std::string, std::string> raw_key() override {
return make_pair(prefix, key());
}
+ std::pair<std::string_view, std::string_view> raw_key_as_sv() override {
+ return make_pair(prefix, iters[0]->key().ToStringView());
+ }
bufferlist value() override {
return to_bufferlist(iters[0]->value());
}
@@ -3027,6 +3111,10 @@ public:
rocksdb::Slice val = iters[0]->value();
return bufferptr(val.data(), val.size());
}
+ std::string_view value_as_sv() override {
+ rocksdb::Slice val = iters[0]->value();
+ return std::string_view{val.data(), val.size()};
+ }
int status() override {
return iters[0]->status().ok() ? 0 : -1;
}
diff --git a/src/kv/RocksDBStore.h b/src/kv/RocksDBStore.h
index 477b209854c..50b91be2bf6 100644
--- a/src/kv/RocksDBStore.h
+++ b/src/kv/RocksDBStore.h
@@ -386,10 +386,13 @@ public:
int next() override;
int prev() override;
std::string key() override;
+ std::string_view key_as_sv() override;
std::pair<std::string,std::string> raw_key() override;
+ std::pair<std::string_view,std::string_view> raw_key_as_sv() override;
bool raw_key_is_prefixed(const std::string &prefix) override;
ceph::bufferlist value() override;
ceph::bufferptr value_as_ptr() override;
+ std::string_view value_as_sv() override;
int status() override;
size_t key_size() override;
size_t value_size() override;
@@ -419,6 +422,7 @@ public:
}
static int split_key(rocksdb::Slice in, std::string *prefix, std::string *key);
+ static int split_key(rocksdb::Slice in, std::string_view *prefix, std::string_view *key);
static std::string past_prefix(const std::string &prefix);
diff --git a/src/librados/librados_asio.h b/src/librados/librados_asio.h
index 0aedc376575..3e5b7c57c6f 100644
--- a/src/librados/librados_asio.h
+++ b/src/librados/librados_asio.h
@@ -14,6 +14,9 @@
#ifndef LIBRADOS_ASIO_H
#define LIBRADOS_ASIO_H
+#include <boost/asio/associated_cancellation_slot.hpp>
+#include <boost/asio/cancellation_type.hpp>
+
#include "include/rados/librados.hpp"
#include "common/async/completion.h"
#include "librados/AioCompletionImpl.h"
@@ -74,6 +77,7 @@ struct Invoker<void> {
template <typename Result>
struct AsyncOp : Invoker<Result> {
unique_aio_completion_ptr aio_completion;
+ boost::asio::cancellation_slot slot;
using Signature = typename Invoker<Result>::Signature;
using Completion = ceph::async::Completion<Signature, AsyncOp<Result>>;
@@ -83,6 +87,7 @@ struct AsyncOp : Invoker<Result> {
auto p = std::unique_ptr<Completion>{static_cast<Completion*>(arg)};
// move result out of Completion memory being freed
auto op = std::move(p->user_data);
+ op.slot.clear(); // clear our cancellation handler
// access AioCompletionImpl directly to avoid locking
const librados::AioCompletionImpl* pc = op.aio_completion->pc;
const int ret = pc->rval;
@@ -94,11 +99,46 @@ struct AsyncOp : Invoker<Result> {
op.dispatch(std::move(p), ec, ver);
}
+ struct op_cancellation {
+ AioCompletion* completion = nullptr;
+ bool is_read = false;
+
+ void operator()(boost::asio::cancellation_type type) {
+ if (completion == nullptr) {
+ return; // no AioCompletion attached
+ } else if (type == boost::asio::cancellation_type::none) {
+ return; // no cancellation requested
+ } else if (is_read) {
+ // read operations produce no side effects, so can satisfy the
+ // requirements of 'total' cancellation. the weaker requirements
+ // of 'partial' and 'terminal' are also satisfied
+ completion->cancel();
+ } else if (type == boost::asio::cancellation_type::terminal) {
+ // write operations only support 'terminal' cancellation because we
+ // can't guarantee that no osd has succeeded (or will succeed) in
+ // applying the write
+ completion->cancel();
+ }
+ }
+ };
+
template <typename Executor1, typename CompletionHandler>
- static auto create(const Executor1& ex1, CompletionHandler&& handler) {
+ static auto create(const Executor1& ex1, bool is_read,
+ CompletionHandler&& handler) {
+ op_cancellation* cancel_handler = nullptr;
+ auto slot = boost::asio::get_associated_cancellation_slot(handler);
+ if (slot.is_connected()) {
+ cancel_handler = &slot.template emplace<op_cancellation>();
+ }
+
auto p = Completion::create(ex1, std::move(handler));
p->user_data.aio_completion.reset(
Rados::aio_create_completion(p.get(), aio_dispatch));
+ if (cancel_handler) {
+ cancel_handler->completion = p->user_data.aio_completion.get();
+ cancel_handler->is_read = is_read;
+ p->user_data.slot = std::move(slot);
+ }
return p;
}
};
@@ -108,6 +148,9 @@ struct AsyncOp : Invoker<Result> {
/// Calls IoCtx::aio_read() and arranges for the AioCompletion to call a
/// given handler with signature (error_code, version_t, bufferlist).
+///
+/// The given IoCtx reference is not required to remain valid, but some IoCtx
+/// instance must preserve its underlying implementation until completion.
template <typename ExecutionContext, typename CompletionToken>
auto async_read(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
size_t len, uint64_t off, CompletionToken&& token)
@@ -117,7 +160,8 @@ auto async_read(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
return boost::asio::async_initiate<CompletionToken, Signature>(
[] (auto handler, auto ex, IoCtx& io, const std::string& oid,
size_t len, uint64_t off) {
- auto p = Op::create(ex, std::move(handler));
+ constexpr bool is_read = true;
+ auto p = Op::create(ex, is_read, std::move(handler));
auto& op = p->user_data;
int ret = io.aio_read(oid, op.aio_completion.get(), &op.result, len, off);
@@ -132,6 +176,9 @@ auto async_read(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
/// Calls IoCtx::aio_write() and arranges for the AioCompletion to call a
/// given handler with signature (error_code, version_t).
+///
+/// The given IoCtx reference is not required to remain valid, but some IoCtx
+/// instance must preserve its underlying implementation until completion.
template <typename ExecutionContext, typename CompletionToken>
auto async_write(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
const bufferlist &bl, size_t len, uint64_t off,
@@ -142,7 +189,8 @@ auto async_write(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
return boost::asio::async_initiate<CompletionToken, Signature>(
[] (auto handler, auto ex, IoCtx& io, const std::string& oid,
const bufferlist &bl, size_t len, uint64_t off) {
- auto p = Op::create(ex, std::move(handler));
+ constexpr bool is_read = false;
+ auto p = Op::create(ex, is_read, std::move(handler));
auto& op = p->user_data;
int ret = io.aio_write(oid, op.aio_completion.get(), bl, len, off);
@@ -157,6 +205,9 @@ auto async_write(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
/// Calls IoCtx::aio_operate() and arranges for the AioCompletion to call a
/// given handler with signature (error_code, version_t, bufferlist).
+///
+/// The given IoCtx reference is not required to remain valid, but some IoCtx
+/// instance must preserve its underlying implementation until completion.
template <typename ExecutionContext, typename CompletionToken>
auto async_operate(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
ObjectReadOperation *read_op, int flags,
@@ -167,7 +218,8 @@ auto async_operate(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
return boost::asio::async_initiate<CompletionToken, Signature>(
[] (auto handler, auto ex, IoCtx& io, const std::string& oid,
ObjectReadOperation *read_op, int flags) {
- auto p = Op::create(ex, std::move(handler));
+ constexpr bool is_read = true;
+ auto p = Op::create(ex, is_read, std::move(handler));
auto& op = p->user_data;
int ret = io.aio_operate(oid, op.aio_completion.get(), read_op,
@@ -183,6 +235,9 @@ auto async_operate(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
/// Calls IoCtx::aio_operate() and arranges for the AioCompletion to call a
/// given handler with signature (error_code, version_t).
+///
+/// The given IoCtx reference is not required to remain valid, but some IoCtx
+/// instance must preserve its underlying implementation until completion.
template <typename ExecutionContext, typename CompletionToken>
auto async_operate(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
ObjectWriteOperation *write_op, int flags,
@@ -194,7 +249,8 @@ auto async_operate(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
[] (auto handler, auto ex, IoCtx& io, const std::string& oid,
ObjectWriteOperation *write_op, int flags,
const jspan_context* trace_ctx) {
- auto p = Op::create(ex, std::move(handler));
+ constexpr bool is_read = false;
+ auto p = Op::create(ex, is_read, std::move(handler));
auto& op = p->user_data;
int ret = io.aio_operate(oid, op.aio_completion.get(), write_op, flags, trace_ctx);
@@ -209,6 +265,9 @@ auto async_operate(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
/// Calls IoCtx::aio_notify() and arranges for the AioCompletion to call a
/// given handler with signature (error_code, version_t, bufferlist).
+///
+/// The given IoCtx reference is not required to remain valid, but some IoCtx
+/// instance must preserve its underlying implementation until completion.
template <typename ExecutionContext, typename CompletionToken>
auto async_notify(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
bufferlist& bl, uint64_t timeout_ms, CompletionToken &&token)
@@ -218,7 +277,8 @@ auto async_notify(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
return boost::asio::async_initiate<CompletionToken, Signature>(
[] (auto handler, auto ex, IoCtx& io, const std::string& oid,
bufferlist& bl, uint64_t timeout_ms) {
- auto p = Op::create(ex, std::move(handler));
+ constexpr bool is_read = false;
+ auto p = Op::create(ex, is_read, std::move(handler));
auto& op = p->user_data;
int ret = io.aio_notify(oid, op.aio_completion.get(),
diff --git a/src/librados/librados_cxx.cc b/src/librados/librados_cxx.cc
index 2167eeade3c..60217b99b41 100644
--- a/src/librados/librados_cxx.cc
+++ b/src/librados/librados_cxx.cc
@@ -1103,6 +1103,14 @@ void librados::AioCompletion::release()
delete this;
}
+int librados::AioCompletion::cancel()
+{
+ if (!pc->io) {
+ return 0; // no operation was started
+ }
+ return pc->io->aio_cancel(pc);
+}
+
///////////////////////////// IoCtx //////////////////////////////
librados::IoCtx::IoCtx() : io_ctx_impl(NULL)
{
diff --git a/src/mgr/PyModule.h b/src/mgr/PyModule.h
index 177447c2cb3..a47db3a47ef 100644
--- a/src/mgr/PyModule.h
+++ b/src/mgr/PyModule.h
@@ -161,9 +161,9 @@ public:
}
const std::string &get_name() const {
- std::lock_guard l(lock) ; return module_name;
+ return module_name;
}
- const std::string &get_error_string() const {
+ std::string get_error_string() const {
std::lock_guard l(lock) ; return error_string;
}
bool get_can_run() const {
diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc
index 719403925ad..2d2735f1e7c 100755
--- a/src/mon/NVMeofGwMap.cc
+++ b/src/mon/NVMeofGwMap.cc
@@ -171,6 +171,8 @@ int NVMeofGwMap::cfg_delete_gw(
<< state.availability << " Resulting GW availability: "
<< state.availability << dendl;
state.subsystems.clear();//ignore subsystems of this GW
+ utime_t now = ceph_clock_now();
+ mon->nvmegwmon()->gws_deleting_time[group_key][gw_id] = now;
return 0;
}
}
@@ -895,10 +897,12 @@ struct CMonRequestProposal : public Context {
}
};
-void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const
+void NVMeofGwMap::get_health_checks(health_check_map_t *checks)
{
list<string> singleGatewayDetail;
list<string> gatewayDownDetail;
+ list<string> gatewayInDeletingDetail;
+ int deleting_gateways = 0;
for (const auto& created_map_pair: created_gws) {
const auto& group_key = created_map_pair.first;
auto& group = group_key.second;
@@ -915,9 +919,37 @@ void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const
ostringstream ss;
ss << "NVMeoF Gateway '" << gw_id << "' is unavailable." ;
gatewayDownDetail.push_back(ss.str());
+ } else if (gw_created.availability == gw_availability_t::GW_DELETING) {
+ deleting_gateways++;
+ utime_t now = ceph_clock_now();
+ bool found_deleting_time = false;
+ auto gws_deleting_time = mon->nvmegwmon()->gws_deleting_time;
+ auto group_it = gws_deleting_time.find(group_key);
+ if (group_it != gws_deleting_time.end()) {
+ auto& gw_map = group_it->second;
+ auto gw_it = gw_map.find(gw_id);
+ if (gw_it != gw_map.end()) {
+ found_deleting_time = true;
+ utime_t delete_time = gw_it->second;
+ if ((now - delete_time) > g_conf().get_val<std::chrono::seconds>("mon_nvmeofgw_delete_grace").count()) {
+ ostringstream ss;
+ ss << "NVMeoF Gateway '" << gw_id << "' is in deleting state.";
+ gatewayInDeletingDetail.push_back(ss.str());
+ }
+ }
+ }
+ if (!found_deleting_time) {
+ // DELETING gateway not found in gws_deleting_time, set timeout now
+ mon->nvmegwmon()->gws_deleting_time[group_key][gw_id] = now;
+ }
}
}
}
+ if (deleting_gateways == 0) {
+ // no gateway in GW_DELETING state currently, flush old gws_deleting_time
+ mon->nvmegwmon()->gws_deleting_time.clear();
+ }
+
if (!singleGatewayDetail.empty()) {
ostringstream ss;
ss << singleGatewayDetail.size() << " group(s) have only 1 nvmeof gateway"
@@ -934,6 +966,15 @@ void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const
ss.str(), gatewayDownDetail.size());
d.detail.swap(gatewayDownDetail);
}
+ if (!gatewayInDeletingDetail.empty()) {
+ ostringstream ss;
+ ss << gatewayInDeletingDetail.size() << " gateway(s) are in deleting state"
+ << "; namespaces are automatically balanced across remaining gateways, "
+ << "this should take a few minutes.";
+ auto& d = checks->add("NVMEOF_GATEWAY_DELETING", HEALTH_WARN,
+ ss.str(), gatewayInDeletingDetail.size());
+ d.detail.swap(gatewayInDeletingDetail);
+ }
}
int NVMeofGwMap::blocklist_gw(
diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h
index 5f657733012..85fd62b3a07 100755
--- a/src/mon/NVMeofGwMap.h
+++ b/src/mon/NVMeofGwMap.h
@@ -144,7 +144,7 @@ public:
DECODE_FINISH(bl);
}
- void get_health_checks(health_check_map_t *checks) const;
+ void get_health_checks(health_check_map_t *checks);
};
#include "NVMeofGwSerialize.h"
diff --git a/src/mon/NVMeofGwMon.h b/src/mon/NVMeofGwMon.h
index 7fae8b766a5..d7f5fd89cde 100644
--- a/src/mon/NVMeofGwMon.h
+++ b/src/mon/NVMeofGwMon.h
@@ -82,6 +82,8 @@ public:
void check_subs(bool type);
void check_sub(Subscription *sub);
+ std::map<NvmeGroupKey, std::map<NvmeGwId, utime_t>> gws_deleting_time;
+
private:
void synchronize_last_beacon();
void process_gw_down(const NvmeGwId &gw_id,
diff --git a/src/os/DBObjectMap.cc b/src/os/DBObjectMap.cc
index 7da9a67be62..65627b5f818 100644
--- a/src/os/DBObjectMap.cc
+++ b/src/os/DBObjectMap.cc
@@ -519,6 +519,11 @@ bufferlist DBObjectMap::DBObjectMapIteratorImpl::value()
return cur_iter->value();
}
+std::string_view DBObjectMap::DBObjectMapIteratorImpl::value_as_sv()
+{
+ return cur_iter->value_as_sv();
+}
+
int DBObjectMap::DBObjectMapIteratorImpl::status()
{
return r;
diff --git a/src/os/DBObjectMap.h b/src/os/DBObjectMap.h
index 444f21eb815..1e1452010e7 100644
--- a/src/os/DBObjectMap.h
+++ b/src/os/DBObjectMap.h
@@ -393,6 +393,7 @@ private:
int next() override { ceph_abort(); return 0; }
std::string key() override { ceph_abort(); return ""; }
ceph::buffer::list value() override { ceph_abort(); return ceph::buffer::list(); }
+ std::string_view value_as_sv() override { ceph_abort(); return std::string_view(); }
int status() override { return 0; }
};
@@ -431,6 +432,7 @@ private:
int next() override;
std::string key() override;
ceph::buffer::list value() override;
+ std::string_view value_as_sv() override;
int status() override;
bool on_parent() {
diff --git a/src/os/ObjectStore.h b/src/os/ObjectStore.h
index 521435b6c31..df3ae920a2f 100644
--- a/src/os/ObjectStore.h
+++ b/src/os/ObjectStore.h
@@ -29,6 +29,7 @@
#include <errno.h>
#include <sys/stat.h>
+#include <functional>
#include <map>
#include <memory>
#include <vector>
@@ -735,15 +736,6 @@ public:
std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values
) = 0;
-#ifdef WITH_SEASTAR
- virtual int omap_get_values(
- CollectionHandle &c, ///< [in] Collection containing oid
- const ghobject_t &oid, ///< [in] Object containing omap
- const std::optional<std::string> &start_after, ///< [in] Keys to get
- std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values
- ) = 0;
-#endif
-
/// Filters keys into out which are defined on oid
virtual int omap_check_keys(
CollectionHandle &c, ///< [in] Collection containing oid
@@ -766,6 +758,48 @@ public:
const ghobject_t &oid ///< [in] object
) = 0;
+ struct omap_iter_seek_t {
+ std::string seek_position;
+ enum {
+ // start with provided key (seek_position), if it exists
+ LOWER_BOUND,
+ // skip provided key (seek_position) even if it exists
+ UPPER_BOUND
+ } seek_type = LOWER_BOUND;
+ static omap_iter_seek_t min_lower_bound() { return {}; }
+ };
+ enum class omap_iter_ret_t {
+ STOP,
+ NEXT
+ };
+ /**
+ * Iterate over object map with user-provided callable
+ *
+ * Warning! The callable is executed under lock on bluestore
+ * operations in c. Do not use bluestore methods on c while
+ * iterating. (Filling in a transaction is no problem).
+ *
+ * @param c collection
+ * @param oid object
+ * @param start_from where the iterator should point to at
+ * the beginning
+ * @param visitor callable that takes OMAP key and corresponding
+ * value as string_views and controls iteration
+ * by the return. It is executed for every object's
+ * OMAP entry from `start_from` till end of the
+ * object's OMAP or till the iteration is stopped
+ * by `STOP`. Please note that if there is no such
+ * entry, `visitor` will be called 0 times.
+ * @return error code, zero on success
+ */
+ virtual int omap_iterate(
+ CollectionHandle &c,
+ const ghobject_t &oid,
+ omap_iter_seek_t start_from,
+ std::function<omap_iter_ret_t(std::string_view,
+ std::string_view)> visitor
+ ) = 0;
+
virtual int flush_journal() { return -EOPNOTSUPP; }
virtual int dump_journal(std::ostream& out) { return -EOPNOTSUPP; }
diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc
index a024a0c2105..25e6c4fe596 100644
--- a/src/os/bluestore/BlueStore.cc
+++ b/src/os/bluestore/BlueStore.cc
@@ -4830,7 +4830,7 @@ void BlueStore::Onode::rewrite_omap_key(const string& old, string *out)
out->append(old.c_str() + out->length(), old.size() - out->length());
}
-void BlueStore::Onode::decode_omap_key(const string& key, string *user_key)
+size_t BlueStore::Onode::calc_userkey_offset_in_omap_key() const
{
size_t pos = sizeof(uint64_t) + 1;
if (!onode.is_pgmeta_omap()) {
@@ -4840,9 +4840,15 @@ void BlueStore::Onode::decode_omap_key(const string& key, string *user_key)
pos += sizeof(uint64_t);
}
}
- *user_key = key.substr(pos);
+ return pos;
}
+void BlueStore::Onode::decode_omap_key(const string& key, string *user_key)
+{
+ *user_key = key.substr(calc_userkey_offset_in_omap_key());
+}
+
+
void BlueStore::Onode::finish_write(TransContext* txc, uint32_t offset, uint32_t length)
{
while (true) {
@@ -5519,7 +5525,13 @@ BlueStore::OmapIteratorImpl::OmapIteratorImpl(
if (o->onode.has_omap()) {
o->get_omap_key(string(), &head);
o->get_omap_tail(&tail);
+ auto start1 = mono_clock::now();
it->lower_bound(head);
+ c->store->log_latency(
+ __func__,
+ l_bluestore_omap_seek_to_first_lat,
+ mono_clock::now() - start1,
+ c->store->cct->_conf->bluestore_log_omap_iterator_age);
}
}
BlueStore::OmapIteratorImpl::~OmapIteratorImpl()
@@ -5654,6 +5666,13 @@ bufferlist BlueStore::OmapIteratorImpl::value()
return it->value();
}
+std::string_view BlueStore::OmapIteratorImpl::value_as_sv()
+{
+ std::shared_lock l(c->lock);
+ ceph_assert(it->valid());
+ return it->value_as_sv();
+}
+
// =====================================
@@ -13601,52 +13620,6 @@ int BlueStore::omap_get_values(
return r;
}
-#ifdef WITH_SEASTAR
-int BlueStore::omap_get_values(
- CollectionHandle &c_, ///< [in] Collection containing oid
- const ghobject_t &oid, ///< [in] Object containing omap
- const std::optional<string> &start_after, ///< [in] Keys to get
- map<string, bufferlist> *output ///< [out] Returned keys and values
- )
-{
- Collection *c = static_cast<Collection *>(c_.get());
- dout(15) << __func__ << " " << c->get_cid() << " oid " << oid << dendl;
- if (!c->exists)
- return -ENOENT;
- std::shared_lock l(c->lock);
- int r = 0;
- OnodeRef o = c->get_onode(oid, false);
- if (!o || !o->exists) {
- r = -ENOENT;
- goto out;
- }
- if (!o->onode.has_omap()) {
- goto out;
- }
- o->flush();
- {
- ObjectMap::ObjectMapIterator iter = get_omap_iterator(c_, oid);
- if (!iter) {
- r = -ENOENT;
- goto out;
- }
- if (start_after) {
- iter->upper_bound(*start_after);
- } else {
- iter->seek_to_first();
- }
- for (; iter->valid(); iter->next()) {
- output->insert(make_pair(iter->key(), iter->value()));
- }
- }
-
-out:
- dout(10) << __func__ << " " << c->get_cid() << " oid " << oid << " = " << r
- << dendl;
- return r;
-}
-#endif
-
int BlueStore::omap_check_keys(
CollectionHandle &c_, ///< [in] Collection containing oid
const ghobject_t &oid, ///< [in] Object containing omap
@@ -13724,6 +13697,94 @@ ObjectMap::ObjectMapIterator BlueStore::get_omap_iterator(
return ObjectMap::ObjectMapIterator(new OmapIteratorImpl(logger,c, o, it));
}
+int BlueStore::omap_iterate(
+ CollectionHandle &c_, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ ObjectStore::omap_iter_seek_t start_from, ///< [in] where the iterator should point to at the beginning
+ std::function<omap_iter_ret_t(std::string_view, std::string_view)> f
+ )
+{
+ Collection *c = static_cast<Collection *>(c_.get());
+ dout(10) << __func__ << " " << c->get_cid() << " " << oid << dendl;
+ if (!c->exists) {
+ return -ENOENT;
+ }
+ std::shared_lock l(c->lock);
+ OnodeRef o = c->get_onode(oid, false);
+ if (!o || !o->exists) {
+ dout(10) << __func__ << " " << oid << "doesn't exist" <<dendl;
+ return -ENOENT;
+ }
+ o->flush();
+ dout(10) << __func__ << " has_omap = " << (int)o->onode.has_omap() <<dendl;
+ if (!o->onode.has_omap()) {
+ // nothing to do
+ return 0;
+ }
+
+ KeyValueDB::Iterator it;
+ {
+ auto bounds = KeyValueDB::IteratorBounds();
+ std::string lower_bound, upper_bound;
+ o->get_omap_key(string(), &lower_bound);
+ o->get_omap_tail(&upper_bound);
+ bounds.lower_bound = std::move(lower_bound);
+ bounds.upper_bound = std::move(upper_bound);
+ it = db->get_iterator(o->get_omap_prefix(), 0, std::move(bounds));
+ }
+
+ // seek the iterator
+ {
+ std::string key;
+ o->get_omap_key(start_from.seek_position, &key);
+ auto start = ceph::mono_clock::now();
+ if (start_from.seek_type == omap_iter_seek_t::LOWER_BOUND) {
+ it->lower_bound(key);
+ c->store->log_latency(
+ __func__,
+ l_bluestore_omap_lower_bound_lat,
+ ceph::mono_clock::now() - start,
+ c->store->cct->_conf->bluestore_log_omap_iterator_age);
+ } else {
+ it->upper_bound(key);
+ c->store->log_latency(
+ __func__,
+ l_bluestore_omap_upper_bound_lat,
+ ceph::mono_clock::now() - start,
+ c->store->cct->_conf->bluestore_log_omap_iterator_age);
+ }
+ }
+
+ // iterate!
+ std::string tail;
+ o->get_omap_tail(&tail);
+ const std::string_view::size_type userkey_offset_in_dbkey =
+ o->calc_userkey_offset_in_omap_key();
+ ceph::timespan next_lat_acc{0};
+ while (it->valid()) {
+ const auto& db_key = it->raw_key_as_sv().second;
+ if (db_key >= tail) {
+ break;
+ }
+ std::string_view user_key = db_key.substr(userkey_offset_in_dbkey);
+ omap_iter_ret_t ret = f(user_key, it->value_as_sv());
+ if (ret == omap_iter_ret_t::STOP) {
+ break;
+ } else if (ret == omap_iter_ret_t::NEXT) {
+ ceph::time_guard<ceph::mono_clock>{next_lat_acc};
+ it->next();
+ } else {
+ ceph_abort();
+ }
+ }
+ c->store->log_latency(
+ __func__,
+ l_bluestore_omap_next_lat,
+ next_lat_acc,
+ c->store->cct->_conf->bluestore_log_omap_iterator_age);
+ return 0;
+}
+
// -----------------
// write helpers
diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h
index 99f8d057cf0..5549f97ffea 100644
--- a/src/os/bluestore/BlueStore.h
+++ b/src/os/bluestore/BlueStore.h
@@ -1457,6 +1457,7 @@ public:
}
void rewrite_omap_key(const std::string& old, std::string *out);
+ size_t calc_userkey_offset_in_omap_key() const;
void decode_omap_key(const std::string& key, std::string *user_key);
void finish_write(TransContext* txc, uint32_t offset, uint32_t length);
@@ -1753,6 +1754,7 @@ public:
int next() override;
std::string key() override;
ceph::buffer::list value() override;
+ std::string_view value_as_sv() override;
std::string tail_key() override {
return tail;
}
@@ -3416,15 +3418,6 @@ public:
std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values
) override;
-#ifdef WITH_SEASTAR
- int omap_get_values(
- CollectionHandle &c, ///< [in] Collection containing oid
- const ghobject_t &oid, ///< [in] Object containing omap
- const std::optional<std::string> &start_after, ///< [in] Keys to get
- std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values
- ) override;
-#endif
-
/// Filters keys into out which are defined on oid
int omap_check_keys(
CollectionHandle &c, ///< [in] Collection containing oid
@@ -3438,6 +3431,13 @@ public:
const ghobject_t &oid ///< [in] object
) override;
+ int omap_iterate(
+ CollectionHandle &c, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ omap_iter_seek_t start_from, ///< [in] where the iterator should point to at the beginning
+ std::function<omap_iter_ret_t(std::string_view, std::string_view)> f
+ ) override;
+
void set_fsid(uuid_d u) override {
fsid = u;
}
diff --git a/src/os/kstore/KStore.cc b/src/os/kstore/KStore.cc
index 7158486ca38..a069d429155 100644
--- a/src/os/kstore/KStore.cc
+++ b/src/os/kstore/KStore.cc
@@ -1651,6 +1651,13 @@ bufferlist KStore::OmapIteratorImpl::value()
return it->value();
}
+std::string_view KStore::OmapIteratorImpl::value_as_sv()
+{
+ std::shared_lock l{c->lock};
+ ceph_assert(it->valid());
+ return it->value_as_sv();
+}
+
int KStore::omap_get(
CollectionHandle& ch, ///< [in] Collection containing oid
const ghobject_t &oid, ///< [in] Object containing omap
@@ -1866,6 +1873,71 @@ ObjectMap::ObjectMapIterator KStore::get_omap_iterator(
return ObjectMap::ObjectMapIterator(new OmapIteratorImpl(c, o, it));
}
+int KStore::omap_iterate(
+ CollectionHandle &ch, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ ObjectStore::omap_iter_seek_t start_from, ///< [in] where the iterator should point to at the beginning
+ std::function<omap_iter_ret_t(std::string_view, std::string_view)> f)
+{
+ dout(10) << __func__ << " " << ch->cid << " " << oid << dendl;
+ Collection *c = static_cast<Collection*>(ch.get());
+ {
+ std::shared_lock l{c->lock};
+
+ OnodeRef o = c->get_onode(oid, false);
+ if (!o || !o->exists) {
+ dout(10) << __func__ << " " << oid << "doesn't exist" <<dendl;
+ return -ENOENT;
+ }
+ o->flush();
+ dout(10) << __func__ << " header = " << o->onode.omap_head <<dendl;
+
+ KeyValueDB::Iterator it = db->get_iterator(PREFIX_OMAP);
+ std::string tail;
+ std::string seek_key;
+ if (o->onode.omap_head) {
+ return 0; // nothing to do
+ }
+
+ // acquire data depedencies for seek & iterate
+ get_omap_key(o->onode.omap_head, start_from.seek_position, &seek_key);
+ get_omap_tail(o->onode.omap_head, &tail);
+
+ // acquire the iterator
+ {
+ it = db->get_iterator(PREFIX_OMAP);
+ }
+
+ // seek the iterator
+ {
+ if (start_from.seek_type == omap_iter_seek_t::LOWER_BOUND) {
+ it->lower_bound(seek_key);
+ } else {
+ it->upper_bound(seek_key);
+ }
+ }
+
+ // iterate!
+ while (it->valid()) {
+ std::string user_key;
+ if (const auto& db_key = it->raw_key().second; db_key >= tail) {
+ break;
+ } else {
+ decode_omap_key(db_key, &user_key);
+ }
+ omap_iter_ret_t ret = f(user_key, it->value_as_sv());
+ if (ret == omap_iter_ret_t::STOP) {
+ break;
+ } else if (ret == omap_iter_ret_t::NEXT) {
+ it->next();
+ } else {
+ ceph_abort();
+ }
+ }
+ }
+ return 0;
+}
+
// -----------------
// write helpers
diff --git a/src/os/kstore/KStore.h b/src/os/kstore/KStore.h
index 9a9d413c66a..06115d3cab7 100644
--- a/src/os/kstore/KStore.h
+++ b/src/os/kstore/KStore.h
@@ -180,6 +180,7 @@ public:
int next() override;
std::string key() override;
ceph::buffer::list value() override;
+ std::string_view value_as_sv() override;
int status() override {
return 0;
}
@@ -553,6 +554,13 @@ public:
const ghobject_t &oid ///< [in] object
) override;
+ int omap_iterate(
+ CollectionHandle &c, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ omap_iter_seek_t start_from, ///< [in] where the iterator should point to at the beginning
+ std::function<omap_iter_ret_t(std::string_view, std::string_view)> f
+ ) override;
+
void set_fsid(uuid_d u) override {
fsid = u;
}
diff --git a/src/os/memstore/MemStore.cc b/src/os/memstore/MemStore.cc
index 89cb09361cf..f9d3bf0d8a2 100644
--- a/src/os/memstore/MemStore.cc
+++ b/src/os/memstore/MemStore.cc
@@ -537,30 +537,6 @@ int MemStore::omap_get_values(
return 0;
}
-#ifdef WITH_SEASTAR
-int MemStore::omap_get_values(
- CollectionHandle& ch, ///< [in] Collection containing oid
- const ghobject_t &oid, ///< [in] Object containing omap
- const std::optional<std::string> &start_after, ///< [in] Keys to get
- std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values
- )
-{
- dout(10) << __func__ << " " << ch->cid << " " << oid << dendl;
- Collection *c = static_cast<Collection*>(ch.get());
- ObjectRef o = c->get_object(oid);
- if (!o)
- return -ENOENT;
- assert(start_after);
- std::lock_guard lock{o->omap_mutex};
- for (auto it = o->omap.upper_bound(*start_after);
- it != std::end(o->omap);
- ++it) {
- out->insert(*it);
- }
- return 0;
-}
-#endif
-
int MemStore::omap_check_keys(
CollectionHandle& ch, ///< [in] Collection containing oid
const ghobject_t &oid, ///< [in] Object containing omap
@@ -622,6 +598,10 @@ public:
std::lock_guard lock{o->omap_mutex};
return it->second;
}
+ std::string_view value_as_sv() override {
+ std::lock_guard lock{o->omap_mutex};
+ return std::string_view{it->second.c_str(), it->second.length()};
+ }
int status() override {
return 0;
}
@@ -639,6 +619,48 @@ ObjectMap::ObjectMapIterator MemStore::get_omap_iterator(
return ObjectMap::ObjectMapIterator(new OmapIteratorImpl(c, o));
}
+int MemStore::omap_iterate(
+ CollectionHandle &ch, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ ObjectStore::omap_iter_seek_t start_from, ///< [in] where the iterator should point to at the beginning
+ std::function<omap_iter_ret_t(std::string_view, std::string_view)> f)
+{
+ Collection *c = static_cast<Collection*>(ch.get());
+ ObjectRef o = c->get_object(oid);
+ if (!o) {
+ return -ENOENT;
+ }
+
+ {
+ std::lock_guard lock{o->omap_mutex};
+
+ // obtain seek the iterator
+ decltype(o->omap)::iterator it;
+ {
+ if (start_from.seek_type == omap_iter_seek_t::LOWER_BOUND) {
+ it = o->omap.lower_bound(start_from.seek_position);
+ } else {
+ it = o->omap.upper_bound(start_from.seek_position);
+ }
+ }
+
+ // iterate!
+ while (it != o->omap.end()) {
+ // potentially rectifying memcpy but who cares for memstore?
+ omap_iter_ret_t ret =
+ f(it->first, std::string_view{it->second.c_str(), it->second.length()});
+ if (ret == omap_iter_ret_t::STOP) {
+ break;
+ } else if (ret == omap_iter_ret_t::NEXT) {
+ ++it;
+ } else {
+ ceph_abort();
+ }
+ }
+ }
+ return 0;
+}
+
// ---------------
// write operations
diff --git a/src/os/memstore/MemStore.h b/src/os/memstore/MemStore.h
index 2abe552891f..9621773598f 100644
--- a/src/os/memstore/MemStore.h
+++ b/src/os/memstore/MemStore.h
@@ -363,14 +363,6 @@ public:
const std::set<std::string> &keys, ///< [in] Keys to get
std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values
) override;
-#ifdef WITH_SEASTAR
- int omap_get_values(
- CollectionHandle &c, ///< [in] Collection containing oid
- const ghobject_t &oid, ///< [in] Object containing omap
- const std::optional<std::string> &start_after, ///< [in] Keys to get
- std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values
- ) override;
-#endif
using ObjectStore::omap_check_keys;
/// Filters keys into out which are defined on oid
@@ -387,6 +379,13 @@ public:
const ghobject_t &oid ///< [in] object
) override;
+ int omap_iterate(
+ CollectionHandle &c, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ omap_iter_seek_t start_from, ///< [in] where the iterator should point to at the beginning
+ std::function<omap_iter_ret_t(std::string_view, std::string_view)> f
+ ) override;
+
void set_fsid(uuid_d u) override;
uuid_d get_fsid() override;
diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc
index fa2570aba42..8630b038812 100644
--- a/src/osd/ECBackend.cc
+++ b/src/osd/ECBackend.cc
@@ -945,6 +945,10 @@ void ECBackend::handle_sub_write(
}
trace.event("handle_sub_write");
+ if (cct->_conf->bluestore_debug_inject_read_err &&
+ ec_inject_test_write_error3(op.soid)) {
+ ceph_abort_msg("Error inject - OSD down");
+ }
if (!get_parent()->pgb_is_primary())
get_parent()->update_stats(op.stats);
ObjectStore::Transaction localt;
@@ -1191,6 +1195,15 @@ void ECBackend::handle_sub_write_reply(
i->second->on_all_commit = 0;
i->second->trace.event("ec write all committed");
}
+ if (cct->_conf->bluestore_debug_inject_read_err &&
+ (i->second->pending_commit.size() == 1) &&
+ ec_inject_test_write_error2(i->second->hoid)) {
+ std::string cmd =
+ "{ \"prefix\": \"osd down\", \"ids\": [\"" + std::to_string( get_parent()->whoami() ) + "\"] }";
+ vector<std::string> vcmd{cmd};
+ dout(0) << __func__ << " Error inject - marking OSD down" << dendl;
+ get_parent()->start_mon_command(vcmd, {}, nullptr, nullptr, nullptr);
+ }
rmw_pipeline.check_ops();
}
@@ -1208,6 +1221,19 @@ void ECBackend::handle_sub_read_reply(
return;
}
ReadOp &rop = iter->second;
+ if (cct->_conf->bluestore_debug_inject_read_err) {
+ for (auto i = op.buffers_read.begin();
+ i != op.buffers_read.end();
+ ++i) {
+ if (ec_inject_test_read_error0(ghobject_t(i->first, ghobject_t::NO_GEN, op.from.shard))) {
+ dout(0) << __func__ << " Error inject - EIO error for shard " << op.from.shard << dendl;
+ op.buffers_read.erase(i->first);
+ op.attrs_read.erase(i->first);
+ op.errors[i->first] = -EIO;
+ }
+
+ }
+ }
for (auto i = op.buffers_read.begin();
i != op.buffers_read.end();
++i) {
diff --git a/src/osd/ECCommon.cc b/src/osd/ECCommon.cc
index 609ac3141ae..59077547fcb 100644
--- a/src/osd/ECCommon.cc
+++ b/src/osd/ECCommon.cc
@@ -226,8 +226,14 @@ void ECCommon::ReadPipeline::get_all_avail_shards(
++i) {
dout(10) << __func__ << ": checking acting " << *i << dendl;
const pg_missing_t &missing = get_parent()->get_shard_missing(*i);
- if (error_shards.find(*i) != error_shards.end())
+ if (error_shards.contains(*i)) {
continue;
+ }
+ if (cct->_conf->bluestore_debug_inject_read_err &&
+ ec_inject_test_read_error1(ghobject_t(hoid, ghobject_t::NO_GEN, i->shard))) {
+ dout(0) << __func__ << " Error inject - Missing shard " << i->shard << dendl;
+ continue;
+ }
if (!missing.is_missing(hoid)) {
ceph_assert(!have.count(i->shard));
have.insert(i->shard);
@@ -912,6 +918,11 @@ bool ECCommon::RMWPipeline::try_reads_to_commit()
if (*i == get_parent()->whoami_shard()) {
should_write_local = true;
local_write_op.claim(sop);
+ } else if (cct->_conf->bluestore_debug_inject_read_err &&
+ ec_inject_test_write_error1(ghobject_t(op->hoid,
+ ghobject_t::NO_GEN, i->shard))) {
+ dout(0) << " Error inject - Dropping write message to shard " <<
+ i->shard << dendl;
} else {
MOSDECSubOpWrite *r = new MOSDECSubOpWrite(sop);
r->pgid = spg_t(get_parent()->primary_spg_t().pgid, i->shard);
@@ -1090,3 +1101,305 @@ ECUtil::HashInfoRef ECCommon::UnstableHashInfoRegistry::get_hash_info(
}
return ref;
}
+
+// Error inject interfaces
+static ceph::recursive_mutex ec_inject_lock =
+ ceph::make_recursive_mutex("ECCommon::ec_inject_lock");
+static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_read_failures0;
+static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_read_failures1;
+static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_write_failures0;
+static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_write_failures1;
+static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_write_failures2;
+static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_write_failures3;
+static std::map<ghobject_t,shard_id_t> ec_inject_write_failures0_shard;
+static std::set<osd_reqid_t> ec_inject_write_failures0_reqid;
+
+/**
+ * Configure a read error inject that typically forces additional reads of
+ * shards in an EC pool to recover data using the redundancy. With multiple
+ * errors it is possible to force client reads to fail.
+ *
+ * Type 0 - Simulate a medium error. Fail a read with -EIO to force
+ * additional reads and a decode
+ *
+ * Type 1 - Simulate a missing OSD. Dont even try to read a shard
+ *
+ * @brief Set up a read error inject for an object in an EC pool.
+ * @param o Target object for the error inject.
+ * @param when Error inject starts after this many object store reads.
+ * @param duration Error inject affects this many object store reads.
+ * @param type Type of error inject 0 = EIO, 1 = missing shard.
+ * @return string Result of configuring the error inject.
+ */
+std::string ec_inject_read_error(const ghobject_t& o,
+ const int64_t type,
+ const int64_t when,
+ const int64_t duration) {
+ std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock);
+ ghobject_t os = o;
+ if (os.hobj.oid.name == "*") {
+ os.hobj.set_hash(0);
+ }
+ switch (type) {
+ case 0:
+ ec_inject_read_failures0[os] = std::pair(when, duration);
+ return "ok - read returns EIO";
+ case 1:
+ ec_inject_read_failures1[os] = std::pair(when, duration);
+ return "ok - read pretends shard is missing";
+ default:
+ break;
+ }
+ return "unrecognized error inject type";
+}
+
+/**
+ * Configure a write error inject that either fails an OSD or causes a
+ * client write operation to be rolled back.
+ *
+ * Type 0 - Tests rollback. Drop a write I/O to a shard, then simulate an OSD
+ * down to force rollback to occur, lastly fail the retried write from the
+ * client so the results of the rollback can be inspected.
+ *
+ * Type 1 - Drop a write I/O to a shard. Used on its own this will hang a
+ * write I/O.
+ *
+ * Type 2 - Simulate an OSD down (ceph osd down) to force a new epoch. Usually
+ * used together with type 1 to force a rollback
+ *
+ * Type 3 - Abort when an OSD processes a write I/O to a shard. Typically the
+ * client write will be commited while the OSD is absent which will result in
+ * recovery or backfill later when the OSD returns.
+ *
+ * @brief Set up a write error inject for an object in an EC pool.
+ * @param o Target object for the error inject.
+ * @param when Error inject starts after this many object store reads.
+ * @param duration Error inject affects this many object store reads.
+ * @param type Type of error inject 0 = EIO, 1 = missing shard.
+ * @return string Result of configuring the error inect.
+ */
+std::string ec_inject_write_error(const ghobject_t& o,
+ const int64_t type,
+ const int64_t when,
+ const int64_t duration) {
+ std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock);
+ std::map<ghobject_t,std::pair<int64_t,int64_t>> *failures;
+ ghobject_t os = o;
+ bool no_shard = true;
+ std::string result;
+ switch (type) {
+ case 0:
+ failures = &ec_inject_write_failures0;
+ result = "ok - drop write, sim OSD down and fail client retry with EINVAL";
+ break;
+ case 1:
+ failures = &ec_inject_write_failures1;
+ no_shard = false;
+ result = "ok - drop write to shard";
+ break;
+ case 2:
+ failures = &ec_inject_write_failures2;
+ result = "ok - inject OSD down";
+ break;
+ case 3:
+ if (duration != 1) {
+ return "duration must be 1";
+ }
+ failures = &ec_inject_write_failures3;
+ result = "ok - write abort OSDs";
+ break;
+ default:
+ return "unrecognized error inject type";
+ }
+ if (no_shard) {
+ os.set_shard(shard_id_t::NO_SHARD);
+ }
+ if (os.hobj.oid.name == "*") {
+ os.hobj.set_hash(0);
+ }
+ (*failures)[os] = std::pair(when, duration);
+ if (type == 0) {
+ ec_inject_write_failures0_shard[os] = o.shard_id;
+ }
+ return result;
+}
+
+/**
+ * @brief Clear a previously configured read error inject.
+ * @param o Target object for the error inject.
+ * @param type Type of error inject 0 = EIO, 1 = missing shard.
+ * @return string Indication of how many errors were cleared.
+ */
+std::string ec_inject_clear_read_error(const ghobject_t& o,
+ const int64_t type) {
+ std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock);
+ std::map<ghobject_t,std::pair<int64_t,int64_t>> *failures;
+ ghobject_t os = o;
+ int64_t remaining = 0;
+ switch (type) {
+ case 0:
+ failures = &ec_inject_read_failures0;
+ break;
+ case 1:
+ failures = &ec_inject_read_failures1;
+ break;
+ default:
+ return "unrecognized error inject type";
+ }
+ if (os.hobj.oid.name == "*") {
+ os.hobj.set_hash(0);
+ }
+ auto it = failures->find(os);
+ if (it != failures->end()) {
+ remaining = it->second.second;
+ failures->erase(it);
+ }
+ if (remaining == 0) {
+ return "no outstanding error injects";
+ } else if (remaining == 1) {
+ return "ok - 1 inject cleared";
+ }
+ return "ok - " + std::to_string(remaining) + " injects cleared";
+}
+
+/**
+ * @brief Clear a previously configured write error inject.
+ * @param o Target object for the error inject.
+ * @param type Type of error inject 0 = EIO, 1 = missing shard.
+ * @return string Indication of how many errors were cleared.
+ */
+std::string ec_inject_clear_write_error(const ghobject_t& o,
+ const int64_t type) {
+ std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock);
+ std::map<ghobject_t,std::pair<int64_t,int64_t>> *failures;
+ ghobject_t os = o;
+ bool no_shard = true;
+ int64_t remaining = 0;
+ switch (type) {
+ case 0:
+ failures = &ec_inject_write_failures0;
+ break;
+ case 1:
+ failures = &ec_inject_write_failures1;
+ no_shard = false;
+ break;
+ case 2:
+ failures = &ec_inject_write_failures2;
+ break;
+ case 3:
+ failures = &ec_inject_write_failures3;
+ break;
+ default:
+ return "unrecognized error inject type";
+ }
+ if (no_shard) {
+ os.set_shard(shard_id_t::NO_SHARD);
+ }
+ if (os.hobj.oid.name == "*") {
+ os.hobj.set_hash(0);
+ }
+ auto it = failures->find(os);
+ if (it != failures->end()) {
+ remaining = it->second.second;
+ failures->erase(it);
+ if (type == 0) {
+ ec_inject_write_failures0_shard.erase(os);
+ }
+ }
+ if (remaining == 0) {
+ return "no outstanding error injects";
+ } else if (remaining == 1) {
+ return "ok - 1 inject cleared";
+ }
+ return "ok - " + std::to_string(remaining) + " injects cleared";
+}
+
+static bool ec_inject_test_error(const ghobject_t& o,
+ std::map<ghobject_t,std::pair<int64_t,int64_t>> *failures)
+{
+ std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock);
+ auto it = failures->find(o);
+ if (it == failures->end()) {
+ ghobject_t os = o;
+ os.hobj.oid.name = "*";
+ os.hobj.set_hash(0);
+ it = failures->find(os);
+ }
+ if (it != failures->end()) {
+ auto && [when,duration] = it->second;
+ if (when > 0) {
+ when--;
+ return false;
+ }
+ if (--duration <= 0) {
+ failures->erase(it);
+ }
+ return true;
+ }
+ return false;
+}
+
+bool ec_inject_test_read_error0(const ghobject_t& o)
+{
+ return ec_inject_test_error(o, &ec_inject_read_failures0);
+}
+
+bool ec_inject_test_read_error1(const ghobject_t& o)
+{
+ return ec_inject_test_error(o, &ec_inject_read_failures1);
+}
+
+bool ec_inject_test_write_error0(const hobject_t& o,
+ const osd_reqid_t& reqid) {
+ std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock);
+ ghobject_t os = ghobject_t(o, ghobject_t::NO_GEN, shard_id_t::NO_SHARD);
+ if (ec_inject_write_failures0_reqid.count(reqid)) {
+ // Matched reqid of retried write - flag for failure
+ ec_inject_write_failures0_reqid.erase(reqid);
+ return true;
+ }
+ auto it = ec_inject_write_failures0.find(os);
+ if (it == ec_inject_write_failures0.end()) {
+ os.hobj.oid.name = "*";
+ os.hobj.set_hash(0);
+ it = ec_inject_write_failures0.find(os);
+ }
+ if (it != ec_inject_write_failures0.end()) {
+ auto && [when, duration] = it->second;
+ auto shard = ec_inject_write_failures0_shard.find(os)->second;
+ if (when > 0) {
+ when--;
+ } else {
+ if (--duration <= 0) {
+ ec_inject_write_failures0.erase(it);
+ ec_inject_write_failures0_shard.erase(os);
+ }
+ // Error inject triggered - save reqid
+ ec_inject_write_failures0_reqid.insert(reqid);
+ // Set up error inject to drop message to primary
+ ec_inject_write_error(ghobject_t(o, ghobject_t::NO_GEN, shard), 1, 0, 1);
+ }
+ }
+ return false;
+}
+
+bool ec_inject_test_write_error1(const ghobject_t& o) {
+ bool rc = ec_inject_test_error(o, &ec_inject_write_failures1);
+ if (rc) {
+ // Set up error inject to generate OSD down
+ ec_inject_write_error(o, 2, 0, 1);
+ }
+ return rc;
+}
+
+bool ec_inject_test_write_error2(const hobject_t& o) {
+ return ec_inject_test_error(
+ ghobject_t(o, ghobject_t::NO_GEN, shard_id_t::NO_SHARD),
+ &ec_inject_write_failures2);
+}
+
+bool ec_inject_test_write_error3(const hobject_t& o) {
+ return ec_inject_test_error(
+ ghobject_t(o, ghobject_t::NO_GEN, shard_id_t::NO_SHARD),
+ &ec_inject_write_failures3);
+}
diff --git a/src/osd/ECCommon.h b/src/osd/ECCommon.h
index 7ff9cae7646..de4c11ad50f 100644
--- a/src/osd/ECCommon.h
+++ b/src/osd/ECCommon.h
@@ -493,6 +493,7 @@ struct ECCommon {
); ///< @return error code, 0 on success
void schedule_recovery_work();
+
};
/**
@@ -843,3 +844,15 @@ void ECCommon::ReadPipeline::filter_read_op(
on_schedule_recovery(op);
}
}
+
+// Error inject interfaces
+std::string ec_inject_read_error(const ghobject_t& o, const int64_t type, const int64_t when, const int64_t duration);
+std::string ec_inject_write_error(const ghobject_t& o, const int64_t type, const int64_t when, const int64_t duration);
+std::string ec_inject_clear_read_error(const ghobject_t& o, const int64_t type);
+std::string ec_inject_clear_write_error(const ghobject_t& o, const int64_t type);
+bool ec_inject_test_read_error0(const ghobject_t& o);
+bool ec_inject_test_read_error1(const ghobject_t& o);
+bool ec_inject_test_write_error0(const hobject_t& o,const osd_reqid_t& reqid);
+bool ec_inject_test_write_error1(const ghobject_t& o);
+bool ec_inject_test_write_error2(const hobject_t& o);
+bool ec_inject_test_write_error3(const hobject_t& o);
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 5223eb283e9..9c9e540cf61 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -37,6 +37,7 @@
#include "osd/PG.h"
#include "osd/scrubber/scrub_machine.h"
#include "osd/scrubber/pg_scrubber.h"
+#include "osd/ECCommon.h"
#include "include/types.h"
#include "include/compat.h"
@@ -4348,6 +4349,46 @@ void OSD::final_init()
"inject metadata error to an object");
ceph_assert(r == 0);
r = admin_socket->register_command(
+ "injectecreaderr " \
+ "name=pool,type=CephString " \
+ "name=objname,type=CephObjectname " \
+ "name=shardid,type=CephInt,req=true,range=0|255 " \
+ "name=type,type=CephInt,req=false " \
+ "name=when,type=CephInt,req=false " \
+ "name=duration,type=CephInt,req=false",
+ test_ops_hook,
+ "inject error for read of object in an EC pool");
+ ceph_assert(r == 0);
+ r = admin_socket->register_command(
+ "injectecclearreaderr " \
+ "name=pool,type=CephString " \
+ "name=objname,type=CephObjectname " \
+ "name=shardid,type=CephInt,req=true,range=0|255 " \
+ "name=type,type=CephInt,req=false",
+ test_ops_hook,
+ "clear read error injects for object in an EC pool");
+ ceph_assert(r == 0);
+ r = admin_socket->register_command(
+ "injectecwriteerr " \
+ "name=pool,type=CephString " \
+ "name=objname,type=CephObjectname " \
+ "name=shardid,type=CephInt,req=true,range=0|255 " \
+ "name=type,type=CephInt,req=false " \
+ "name=when,type=CephInt,req=false " \
+ "name=duration,type=CephInt,req=false",
+ test_ops_hook,
+ "inject error for write of object in an EC pool");
+ ceph_assert(r == 0);
+ r = admin_socket->register_command(
+ "injectecclearwriteerr " \
+ "name=pool,type=CephString " \
+ "name=objname,type=CephObjectname " \
+ "name=shardid,type=CephInt,req=true,range=0|255 " \
+ "name=type,type=CephInt,req=false",
+ test_ops_hook,
+ "clear write error inject for object in an EC pool");
+ ceph_assert(r == 0);
+ r = admin_socket->register_command(
"set_recovery_delay " \
"name=utime,type=CephInt,req=false",
test_ops_hook,
@@ -6487,8 +6528,10 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
//directly request the osd make a change.
if (command == "setomapval" || command == "rmomapkey" ||
command == "setomapheader" || command == "getomap" ||
- command == "truncobj" || command == "injectmdataerr" ||
- command == "injectdataerr"
+ command == "truncobj" ||
+ command == "injectmdataerr" || command == "injectdataerr" ||
+ command == "injectecreaderr" || command == "injectecclearreaderr" ||
+ command == "injectecwriteerr" || command == "injectecclearwriteerr"
) {
pg_t rawpg;
int64_t pool;
@@ -6527,8 +6570,21 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
ghobject_t gobj(obj, ghobject_t::NO_GEN, shard_id_t(uint8_t(shardid)));
spg_t pgid(curmap->raw_pg_to_pg(rawpg), shard_id_t(shardid));
if (curmap->pg_is_ec(rawpg)) {
- if ((command != "injectdataerr") && (command != "injectmdataerr")) {
- ss << "Must not call on ec pool, except injectdataerr or injectmdataerr";
+ if ((command != "injectdataerr") &&
+ (command != "injectmdataerr") &&
+ (command != "injectecreaderr") &&
+ (command != "injectecclearreaderr") &&
+ (command != "injectecwriteerr") &&
+ (command != "injectecclearwriteerr")) {
+ ss << "Must not call on ec pool";
+ return;
+ }
+ } else {
+ if ((command == "injectecreaderr") ||
+ (command == "injecteclearreaderr") ||
+ (command == "injectecwriteerr") ||
+ (command == "injecteclearwriteerr")) {
+ ss << "Only supported on ec pool";
return;
}
}
@@ -6607,6 +6663,38 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
} else if (command == "injectmdataerr") {
store->inject_mdata_error(gobj);
ss << "ok";
+ } else if (command == "injectecreaderr") {
+ if (service->cct->_conf->bluestore_debug_inject_read_err) {
+ int64_t type = cmd_getval_or<int64_t>(cmdmap, "type", 0);
+ int64_t when = cmd_getval_or<int64_t>(cmdmap, "when", 0);
+ int64_t duration = cmd_getval_or<int64_t>(cmdmap, "duration", 1);
+ ss << ec_inject_read_error(gobj, type, when, duration);
+ } else {
+ ss << "bluestore_debug_inject_read_err not enabled";
+ }
+ } else if (command == "injectecclearreaderr") {
+ if (service->cct->_conf->bluestore_debug_inject_read_err) {
+ int64_t type = cmd_getval_or<int64_t>(cmdmap, "type", 0);
+ ss << ec_inject_clear_read_error(gobj, type);
+ } else {
+ ss << "bluestore_debug_inject_read_err not enabled";
+ }
+ } else if (command == "injectecwriteerr") {
+ if (service->cct->_conf->bluestore_debug_inject_read_err) {
+ int64_t type = cmd_getval_or<int64_t>(cmdmap, "type", 0);
+ int64_t when = cmd_getval_or<int64_t>(cmdmap, "when", 0);
+ int64_t duration = cmd_getval_or<int64_t>(cmdmap, "duration", 1);
+ ss << ec_inject_write_error(gobj, type, when, duration);
+ } else {
+ ss << "bluestore_debug_inject_read_err not enabled";
+ }
+ } else if (command == "injectecclearwriteerr") {
+ if (service->cct->_conf->bluestore_debug_inject_read_err) {
+ int64_t type = cmd_getval_or<int64_t>(cmdmap, "type", 0);
+ ss << ec_inject_clear_write_error(gobj, type);
+ } else {
+ ss << "bluestore_debug_inject_read_err not enabled";
+ }
}
return;
}
diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc
index b87484c1a9d..9b3593d54e5 100644
--- a/src/osd/OSDMap.cc
+++ b/src/osd/OSDMap.cc
@@ -1642,12 +1642,10 @@ void OSDMap::get_out_of_subnet_osd_counts(CephContext *cct,
for (int i = 0; i < max_osd; i++) {
if (exists(i) && is_up(i)) {
if (const auto& addrs = get_addrs(i).v; addrs.size() >= 2) {
- auto v1_addr = addrs[0].ip_only_to_str();
- if (!is_addr_in_subnet(cct, public_network, v1_addr)) {
+ if (!is_addr_in_subnet(cct, public_network, addrs[0])) {
unreachable->emplace(i);
}
- auto v2_addr = addrs[1].ip_only_to_str();
- if (!is_addr_in_subnet(cct, public_network, v2_addr)) {
+ if (!is_addr_in_subnet(cct, public_network, addrs[1])) {
unreachable->emplace(i);
}
}
diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h
index b87aa1da677..f5eb9ea951e 100644
--- a/src/osd/PGBackend.h
+++ b/src/osd/PGBackend.h
@@ -290,6 +290,10 @@ typedef std::shared_ptr<const OSDMap> OSDMapRef;
MessageRef, Connection *con) = 0;
virtual void send_message_osd_cluster(
Message *m, const ConnectionRef& con) = 0;
+ virtual void start_mon_command(
+ const std::vector<std::string>& cmd, const bufferlist& inbl,
+ bufferlist *outbl, std::string *outs,
+ Context *onfinish) = 0;
virtual ConnectionRef get_con_osd_cluster(int peer, epoch_t from_epoch) = 0;
virtual entity_name_t get_cluster_msgr_name() = 0;
diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc
index 44f8e85b5ef..3324ba9dc91 100644
--- a/src/osd/PrimaryLogPG.cc
+++ b/src/osd/PrimaryLogPG.cc
@@ -2286,6 +2286,16 @@ void PrimaryLogPG::do_op(OpRequestRef& op)
}
}
+ if (cct->_conf->bluestore_debug_inject_read_err &&
+ op->may_write() &&
+ pool.info.is_erasure() &&
+ ec_inject_test_write_error0(m->get_hobj(), m->get_reqid())) {
+ // Fail retried write with error
+ dout(0) << __func__ << " Error inject - Fail retried write with EINVAL" << dendl;
+ osd->reply_op_error(op, -EINVAL);
+ return;
+ }
+
ObjectContextRef obc;
bool can_create = op->may_write();
hobject_t missing_oid;
@@ -5798,10 +5808,19 @@ int PrimaryLogPG::do_extent_cmp(OpContext *ctx, OSDOp& osd_op)
int PrimaryLogPG::finish_extent_cmp(OSDOp& osd_op, const bufferlist &read_bl)
{
- for (uint64_t idx = 0; idx < osd_op.indata.length(); ++idx) {
- char read_byte = (idx < read_bl.length() ? read_bl[idx] : 0);
- if (osd_op.indata[idx] != read_byte) {
- return (-MAX_ERRNO - idx);
+ auto input_iter = osd_op.indata.begin();
+ auto read_iter = read_bl.begin();
+ uint64_t idx = 0;
+
+ while (input_iter != osd_op.indata.end()) {
+ char read_byte = (read_iter != read_bl.end() ? *read_iter : 0);
+ if (*input_iter != read_byte) {
+ return (-MAX_ERRNO - idx);
+ }
+ ++idx;
+ ++input_iter;
+ if (read_iter != read_bl.end()) {
+ ++read_iter;
}
}
@@ -7767,27 +7786,34 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
bool truncated = false;
bufferlist bl;
if (oi.is_omap()) {
- ObjectMap::ObjectMapIterator iter = osd->store->get_omap_iterator(
- ch, ghobject_t(soid)
- );
- if (!iter) {
- result = -ENOENT;
- goto fail;
- }
- iter->upper_bound(start_after);
- if (filter_prefix > start_after) iter->lower_bound(filter_prefix);
- for (num = 0;
- iter->valid() &&
- iter->key().substr(0, filter_prefix.size()) == filter_prefix;
- ++num, iter->next()) {
- dout(20) << "Found key " << iter->key() << dendl;
- if (num >= max_return ||
- bl.length() >= cct->_conf->osd_max_omap_bytes_per_request) {
- truncated = true;
- break;
- }
- encode(iter->key(), bl);
- encode(iter->value(), bl);
+ using omap_iter_seek_t = ObjectStore::omap_iter_seek_t;
+ result = osd->store->omap_iterate(
+ ch, ghobject_t(soid),
+ // try to seek as many keys-at-once as possible for the sake of performance.
+ // note complexity should be logarithmic, so seek(n/2) + seek(n/2) is worse
+ // than just seek(n).
+ ObjectStore::omap_iter_seek_t{
+ .seek_position = std::max(start_after, filter_prefix),
+ .seek_type = filter_prefix > start_after ? omap_iter_seek_t::LOWER_BOUND
+ : omap_iter_seek_t::UPPER_BOUND
+ },
+ [&bl, &truncated, &filter_prefix, &num, max_return,
+ max_bytes=cct->_conf->osd_max_omap_bytes_per_request]
+ (std::string_view key, std::string_view value) mutable {
+ if (key.substr(0, filter_prefix.size()) != filter_prefix) {
+ return ObjectStore::omap_iter_ret_t::STOP;
+ }
+ if (num >= max_return || bl.length() >= max_bytes) {
+ truncated = true;
+ return ObjectStore::omap_iter_ret_t::STOP;
+ }
+ encode(key, bl);
+ encode(value, bl);
+ ++num;
+ return ObjectStore::omap_iter_ret_t::NEXT;
+ });
+ if (result < 0) {
+ goto fail;
}
} // else return empty out_set
encode(num, osd_op.outdata);
diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h
index f66b5c6e16a..bf55d539821 100644
--- a/src/osd/PrimaryLogPG.h
+++ b/src/osd/PrimaryLogPG.h
@@ -622,6 +622,12 @@ public:
Message *m, const ConnectionRef& con) override {
osd->send_message_osd_cluster(m, con);
}
+ void start_mon_command(
+ const std::vector<std::string>& cmd, const bufferlist& inbl,
+ bufferlist *outbl, std::string *outs,
+ Context *onfinish) override {
+ osd->monc->start_mon_command(cmd, inbl, outbl, outs, onfinish);
+ }
ConnectionRef get_con_osd_cluster(int peer, epoch_t from_epoch) override;
entity_name_t get_cluster_msgr_name() override {
return osd->get_cluster_msgr_name();
@@ -1993,6 +1999,7 @@ public:
private:
DynamicPerfStats m_dynamic_perf_stats;
+
};
inline ostream& operator<<(ostream& out, const PrimaryLogPG::RepGather& repop)
@@ -2021,5 +2028,4 @@ inline ostream& operator<<(ostream& out,
void intrusive_ptr_add_ref(PrimaryLogPG::RepGather *repop);
void intrusive_ptr_release(PrimaryLogPG::RepGather *repop);
-
#endif
diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py
index f1c56d75378..550604fc55b 100644
--- a/src/pybind/mgr/cephadm/inventory.py
+++ b/src/pybind/mgr/cephadm/inventory.py
@@ -2036,8 +2036,8 @@ class CertKeyStore():
var = service_name if entity in self.service_name_cert else host
j = {}
self.known_certs[entity][var] = cert_obj
- for service_name in self.known_certs[entity].keys():
- j[var] = Cert.to_json(self.known_certs[entity][var])
+ for cert_key in self.known_certs[entity]:
+ j[cert_key] = Cert.to_json(self.known_certs[entity][cert_key])
else:
self.known_certs[entity] = cert_obj
j = Cert.to_json(cert_obj)
diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py
index bf14f8d1715..6690153d435 100644
--- a/src/pybind/mgr/cephadm/module.py
+++ b/src/pybind/mgr/cephadm/module.py
@@ -2460,7 +2460,7 @@ Then run the following:
@handle_orch_error
def service_action(self, action: str, service_name: str) -> List[str]:
- if service_name not in self.spec_store.all_specs.keys():
+ if service_name not in self.spec_store.all_specs.keys() and service_name != 'osd':
raise OrchestratorError(f'Invalid service name "{service_name}".'
+ ' View currently running services using "ceph orch ls"')
dds: List[DaemonDescription] = self.cache.get_daemons_by_service(service_name)
@@ -3925,6 +3925,50 @@ Then run the following:
return self.to_remove_osds.all_osds()
@handle_orch_error
+ def set_osd_spec(self, service_name: str, osd_ids: List[str]) -> str:
+ """
+ Update unit.meta file for osd with service name
+ """
+ if service_name not in self.spec_store:
+ raise OrchestratorError(f"Cannot find service '{service_name}' in the inventory. "
+ "Please try again after applying an OSD service that matches "
+ "the service name to which you want to attach OSDs.")
+
+ daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_type('osd')
+ update_osd = defaultdict(list)
+ for daemon in daemons:
+ if daemon.daemon_id in osd_ids and daemon.hostname:
+ update_osd[daemon.hostname].append(daemon.daemon_id)
+
+ if not update_osd:
+ raise OrchestratorError(f"Unable to find OSDs: {osd_ids}")
+
+ failed_osds = []
+ success_osds = []
+ for host in update_osd:
+ osds = ",".join(update_osd[host])
+ # run cephadm command with all host osds on specific host,
+ # if it fails, continue with other hosts
+ try:
+ with self.async_timeout_handler(host):
+ outs, errs, _code = self.wait_async(
+ CephadmServe(self)._run_cephadm(host,
+ cephadmNoImage,
+ 'update-osd-service',
+ ['--service-name', service_name, '--osd-ids', osds]))
+ if _code:
+ self.log.error(f"Failed to update service for {osds} osd. Cephadm error: {errs}")
+ failed_osds.extend(update_osd[host])
+ else:
+ success_osds.extend(update_osd[host])
+ except Exception:
+ self.log.exception(f"Failed to set service name for {osds}")
+ failed_osds.extend(update_osd[host])
+ self.cache.invalidate_host_daemons(host)
+ self._kick_serve_loop()
+ return f"Updated service for osd {','.join(success_osds)}" + (f" and failed for {','.join(failed_osds)}" if failed_osds else "")
+
+ @handle_orch_error
@host_exists()
def drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False, zap_osd_devices: bool = False) -> str:
"""
diff --git a/src/pybind/mgr/cephadm/schedule.py b/src/pybind/mgr/cephadm/schedule.py
index 98d2fe99897..04d3712c50a 100644
--- a/src/pybind/mgr/cephadm/schedule.py
+++ b/src/pybind/mgr/cephadm/schedule.py
@@ -385,6 +385,8 @@ class HostAssignment(object):
def find_ip_on_host(self, hostname: str, subnets: List[str]) -> Optional[str]:
for subnet in subnets:
+ # to normalize subnet
+ subnet = str(ipaddress.ip_network(subnet))
ips: List[str] = []
# following is to allow loopback interfaces for both ipv4 and ipv6. Since we
# only have the subnet (and no IP) we assume default loopback IP address.
diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py
index 04f5af28a9b..4f83d7bb0fb 100644
--- a/src/pybind/mgr/cephadm/services/cephadmservice.py
+++ b/src/pybind/mgr/cephadm/services/cephadmservice.py
@@ -1157,6 +1157,14 @@ class RgwService(CephService):
'value': str(spec.rgw_bucket_counters_cache_size),
})
+ if getattr(spec, 'disable_multisite_sync_traffic', None) is not None:
+ ret, out, err = self.mgr.check_mon_command({
+ 'prefix': 'config set',
+ 'who': daemon_name,
+ 'name': 'rgw_run_sync_thread',
+ 'value': 'false' if spec.disable_multisite_sync_traffic else 'true',
+ })
+
daemon_spec.keyring = keyring
daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py
index 1b9cf618570..9c5b5a112f3 100644
--- a/src/pybind/mgr/cephadm/services/monitoring.py
+++ b/src/pybind/mgr/cephadm/services/monitoring.py
@@ -3,6 +3,7 @@ import logging
import os
import socket
from typing import List, Any, Tuple, Dict, Optional, cast
+import ipaddress
from mgr_module import HandleCommandResult
@@ -57,6 +58,8 @@ class GrafanaService(CephadmService):
if ip_to_bind_to:
daemon_spec.port_ips = {str(grafana_port): ip_to_bind_to}
grafana_ip = ip_to_bind_to
+ if ipaddress.ip_network(grafana_ip).version == 6:
+ grafana_ip = f"[{grafana_ip}]"
domain = self.mgr.get_fqdn(daemon_spec.host)
mgmt_gw_ips = []
@@ -354,6 +357,13 @@ class AlertmanagerService(CephadmService):
addr = self.mgr.get_fqdn(dd.hostname)
peers.append(build_url(host=addr, port=port).lstrip('/'))
+ ip_to_bind_to = ''
+ if spec.only_bind_port_on_networks and spec.networks:
+ assert daemon_spec.host is not None
+ ip_to_bind_to = self.mgr.get_first_matching_network_ip(daemon_spec.host, spec) or ''
+ if ip_to_bind_to:
+ daemon_spec.port_ips = {str(port): ip_to_bind_to}
+
deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
if security_enabled:
alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
@@ -376,7 +386,8 @@ class AlertmanagerService(CephadmService):
},
'peers': peers,
'web_config': '/etc/alertmanager/web.yml',
- 'use_url_prefix': mgmt_gw_enabled
+ 'use_url_prefix': mgmt_gw_enabled,
+ 'ip_to_bind_to': ip_to_bind_to
}, sorted(deps)
else:
return {
@@ -384,7 +395,8 @@ class AlertmanagerService(CephadmService):
"alertmanager.yml": yml
},
"peers": peers,
- 'use_url_prefix': mgmt_gw_enabled
+ 'use_url_prefix': mgmt_gw_enabled,
+ 'ip_to_bind_to': ip_to_bind_to
}, sorted(deps)
def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
diff --git a/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 b/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2
index de993cb6ce3..b6955caf616 100644
--- a/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2
+++ b/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2
@@ -8,6 +8,8 @@ global:
tls_config:
{% if security_enabled %}
ca_file: root_cert.pem
+ cert_file: alertmanager.crt
+ key_file: alertmanager.key
{% else %}
insecure_skip_verify: true
{% endif %}
diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2
index b9773ceeeb3..14af0fd48ca 100644
--- a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2
+++ b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2
@@ -9,6 +9,7 @@ events {
http {
#access_log /dev/stdout;
+ error_log /dev/stderr info;
client_header_buffer_size 32K;
large_client_header_buffers 4 32k;
proxy_busy_buffers_size 512k;
diff --git a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2
index ecfd899af71..961da145dac 100644
--- a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2
+++ b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2
@@ -28,6 +28,8 @@ alerting:
password: {{ service_discovery_password }}
tls_config:
ca_file: root_cert.pem
+ cert_file: prometheus.crt
+ key_file: prometheus.key
{% else %}
- scheme: http
http_sd_configs:
@@ -56,6 +58,8 @@ scrape_configs:
password: {{ service_discovery_password }}
tls_config:
ca_file: root_cert.pem
+ cert_file: prometheus.crt
+ key_file: prometheus.key
{% else %}
honor_labels: true
http_sd_configs:
@@ -81,6 +85,8 @@ scrape_configs:
password: {{ service_discovery_password }}
tls_config:
ca_file: root_cert.pem
+ cert_file: prometheus.crt
+ key_file: prometheus.key
{% else %}
http_sd_configs:
- url: {{ node_exporter_sd_url }}
@@ -104,6 +110,8 @@ scrape_configs:
password: {{ service_discovery_password }}
tls_config:
ca_file: root_cert.pem
+ cert_file: prometheus.crt
+ key_file: prometheus.key
{% else %}
http_sd_configs:
- url: {{ haproxy_sd_url }}
@@ -128,6 +136,8 @@ scrape_configs:
password: {{ service_discovery_password }}
tls_config:
ca_file: root_cert.pem
+ cert_file: prometheus.crt
+ key_file: prometheus.key
{% else %}
honor_labels: true
http_sd_configs:
@@ -149,6 +159,8 @@ scrape_configs:
password: {{ service_discovery_password }}
tls_config:
ca_file: root_cert.pem
+ cert_file: prometheus.crt
+ key_file: prometheus.key
{% else %}
http_sd_configs:
- url: {{ nvmeof_sd_url }}
@@ -169,6 +181,8 @@ scrape_configs:
password: {{ service_discovery_password }}
tls_config:
ca_file: root_cert.pem
+ cert_file: prometheus.crt
+ key_file: prometheus.key
{% else %}
http_sd_configs:
- url: {{ nfs_sd_url }}
@@ -189,6 +203,8 @@ scrape_configs:
password: {{ service_discovery_password }}
tls_config:
ca_file: root_cert.pem
+ cert_file: prometheus.crt
+ key_file: prometheus.key
{% else %}
http_sd_configs:
- url: {{ smb_sd_url }}
diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py
index b81510504d9..22bd26def91 100644
--- a/src/pybind/mgr/cephadm/tests/test_cephadm.py
+++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py
@@ -1741,16 +1741,23 @@ class TestCephadm(object):
nvmeof_client_cert = 'fake-nvmeof-client-cert'
nvmeof_server_cert = 'fake-nvmeof-server-cert'
nvmeof_root_ca_cert = 'fake-nvmeof-root-ca-cert'
+ grafana_cert_host_1 = 'grafana-cert-host-1'
+ grafana_cert_host_2 = 'grafana-cert-host-2'
cephadm_module.cert_key_store.save_cert('rgw_frontend_ssl_cert', rgw_frontend_rgw_foo_host2_cert, service_name='rgw.foo', user_made=True)
cephadm_module.cert_key_store.save_cert('nvmeof_server_cert', nvmeof_server_cert, service_name='nvmeof.foo', user_made=True)
cephadm_module.cert_key_store.save_cert('nvmeof_client_cert', nvmeof_client_cert, service_name='nvmeof.foo', user_made=True)
cephadm_module.cert_key_store.save_cert('nvmeof_root_ca_cert', nvmeof_root_ca_cert, service_name='nvmeof.foo', user_made=True)
+ cephadm_module.cert_key_store.save_cert('grafana_cert', grafana_cert_host_1, host='host-1', user_made=True)
+ cephadm_module.cert_key_store.save_cert('grafana_cert', grafana_cert_host_2, host='host-2', user_made=True)
expected_calls = [
mock.call(f'{CERT_STORE_CERT_PREFIX}rgw_frontend_ssl_cert', json.dumps({'rgw.foo': Cert(rgw_frontend_rgw_foo_host2_cert, True).to_json()})),
mock.call(f'{CERT_STORE_CERT_PREFIX}nvmeof_server_cert', json.dumps({'nvmeof.foo': Cert(nvmeof_server_cert, True).to_json()})),
mock.call(f'{CERT_STORE_CERT_PREFIX}nvmeof_client_cert', json.dumps({'nvmeof.foo': Cert(nvmeof_client_cert, True).to_json()})),
mock.call(f'{CERT_STORE_CERT_PREFIX}nvmeof_root_ca_cert', json.dumps({'nvmeof.foo': Cert(nvmeof_root_ca_cert, True).to_json()})),
+ mock.call(f'{CERT_STORE_CERT_PREFIX}grafana_cert', json.dumps({'host-1': Cert(grafana_cert_host_1, True).to_json()})),
+ mock.call(f'{CERT_STORE_CERT_PREFIX}grafana_cert', json.dumps({'host-1': Cert(grafana_cert_host_1, True).to_json(),
+ 'host-2': Cert(grafana_cert_host_2, True).to_json()}))
]
_set_store.assert_has_calls(expected_calls)
@@ -1795,17 +1802,20 @@ class TestCephadm(object):
cephadm_module.cert_key_store._init_known_cert_key_dicts()
grafana_host1_key = 'fake-grafana-host1-key'
+ grafana_host2_key = 'fake-grafana-host2-key'
nvmeof_client_key = 'nvmeof-client-key'
nvmeof_server_key = 'nvmeof-server-key'
nvmeof_encryption_key = 'nvmeof-encryption-key'
- grafana_host1_key = 'fake-grafana-host1-cert'
cephadm_module.cert_key_store.save_key('grafana_key', grafana_host1_key, host='host1')
+ cephadm_module.cert_key_store.save_key('grafana_key', grafana_host2_key, host='host2')
cephadm_module.cert_key_store.save_key('nvmeof_client_key', nvmeof_client_key, service_name='nvmeof.foo')
cephadm_module.cert_key_store.save_key('nvmeof_server_key', nvmeof_server_key, service_name='nvmeof.foo')
cephadm_module.cert_key_store.save_key('nvmeof_encryption_key', nvmeof_encryption_key, service_name='nvmeof.foo')
expected_calls = [
mock.call(f'{CERT_STORE_KEY_PREFIX}grafana_key', json.dumps({'host1': PrivKey(grafana_host1_key).to_json()})),
+ mock.call(f'{CERT_STORE_KEY_PREFIX}grafana_key', json.dumps({'host1': PrivKey(grafana_host1_key).to_json(),
+ 'host2': PrivKey(grafana_host2_key).to_json()})),
mock.call(f'{CERT_STORE_KEY_PREFIX}nvmeof_client_key', json.dumps({'nvmeof.foo': PrivKey(nvmeof_client_key).to_json()})),
mock.call(f'{CERT_STORE_KEY_PREFIX}nvmeof_server_key', json.dumps({'nvmeof.foo': PrivKey(nvmeof_server_key).to_json()})),
mock.call(f'{CERT_STORE_KEY_PREFIX}nvmeof_encryption_key', json.dumps({'nvmeof.foo': PrivKey(nvmeof_encryption_key).to_json()})),
diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py
index 0d89657ac8c..d872219df80 100644
--- a/src/pybind/mgr/cephadm/tests/test_services.py
+++ b/src/pybind/mgr/cephadm/tests/test_services.py
@@ -581,7 +581,14 @@ class TestMonitoring:
mock_getfqdn.return_value = purl.hostname
with with_host(cephadm_module, "test"):
- with with_service(cephadm_module, AlertManagerSpec()):
+ cephadm_module.cache.update_host_networks('test', {
+ '1.2.3.0/24': {
+ 'if0': ['1.2.3.1']
+ },
+ })
+ with with_service(cephadm_module, AlertManagerSpec('alertmanager',
+ networks=['1.2.3.0/24'],
+ only_bind_port_on_networks=True)):
y = dedent(self._get_config(expected_yaml_url)).lstrip()
_run_cephadm.assert_called_with(
'test',
@@ -595,11 +602,12 @@ class TestMonitoring:
"deploy_arguments": [],
"params": {
'tcp_ports': [9093, 9094],
+ 'port_ips': {"9094": "1.2.3.1"},
},
"meta": {
'service_name': 'alertmanager',
'ports': [9093, 9094],
- 'ip': None,
+ 'ip': '1.2.3.1',
'deployed_by': [],
'rank': None,
'rank_generation': None,
@@ -612,6 +620,7 @@ class TestMonitoring:
},
"peers": [],
"use_url_prefix": False,
+ "ip_to_bind_to": "1.2.3.1",
}
}),
error_ok=True,
@@ -634,8 +643,16 @@ class TestMonitoring:
cephadm_module.secure_monitoring_stack = True
cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user')
cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password')
+
+ cephadm_module.cache.update_host_networks('test', {
+ 'fd12:3456:789a::/64': {
+ 'if0': ['fd12:3456:789a::10']
+ },
+ })
with with_service(cephadm_module, MgmtGatewaySpec("mgmt-gateway")) as _, \
- with_service(cephadm_module, AlertManagerSpec()):
+ with_service(cephadm_module, AlertManagerSpec('alertmanager',
+ networks=['fd12:3456:789a::/64'],
+ only_bind_port_on_networks=True)):
y = dedent("""
# This file is generated by cephadm.
@@ -646,6 +663,8 @@ class TestMonitoring:
http_config:
tls_config:
ca_file: root_cert.pem
+ cert_file: alertmanager.crt
+ key_file: alertmanager.key
route:
receiver: 'default'
@@ -686,11 +705,12 @@ class TestMonitoring:
"deploy_arguments": [],
"params": {
'tcp_ports': [9093, 9094],
+ 'port_ips': {"9094": "fd12:3456:789a::10"}
},
"meta": {
'service_name': 'alertmanager',
'ports': [9093, 9094],
- 'ip': None,
+ 'ip': 'fd12:3456:789a::10',
'deployed_by': [],
'rank': None,
'rank_generation': None,
@@ -708,6 +728,7 @@ class TestMonitoring:
'peers': [],
'web_config': '/etc/alertmanager/web.yml',
"use_url_prefix": True,
+ "ip_to_bind_to": "fd12:3456:789a::10",
}
}),
error_ok=True,
@@ -741,6 +762,8 @@ class TestMonitoring:
http_config:
tls_config:
ca_file: root_cert.pem
+ cert_file: alertmanager.crt
+ key_file: alertmanager.key
route:
receiver: 'default'
@@ -801,6 +824,7 @@ class TestMonitoring:
'peers': [],
'web_config': '/etc/alertmanager/web.yml',
"use_url_prefix": False,
+ "ip_to_bind_to": "",
}
}),
error_ok=True,
@@ -1170,6 +1194,8 @@ class TestMonitoring:
password: sd_password
tls_config:
ca_file: root_cert.pem
+ cert_file: prometheus.crt
+ key_file: prometheus.key
scrape_configs:
- job_name: 'ceph'
@@ -1191,6 +1217,8 @@ class TestMonitoring:
password: sd_password
tls_config:
ca_file: root_cert.pem
+ cert_file: prometheus.crt
+ key_file: prometheus.key
- job_name: 'node'
relabel_configs:
@@ -1209,6 +1237,8 @@ class TestMonitoring:
password: sd_password
tls_config:
ca_file: root_cert.pem
+ cert_file: prometheus.crt
+ key_file: prometheus.key
- job_name: 'haproxy'
relabel_configs:
@@ -1225,6 +1255,8 @@ class TestMonitoring:
password: sd_password
tls_config:
ca_file: root_cert.pem
+ cert_file: prometheus.crt
+ key_file: prometheus.key
- job_name: 'ceph-exporter'
relabel_configs:
@@ -1242,6 +1274,8 @@ class TestMonitoring:
password: sd_password
tls_config:
ca_file: root_cert.pem
+ cert_file: prometheus.crt
+ key_file: prometheus.key
- job_name: 'nvmeof'
honor_labels: true
@@ -1255,6 +1289,8 @@ class TestMonitoring:
password: sd_password
tls_config:
ca_file: root_cert.pem
+ cert_file: prometheus.crt
+ key_file: prometheus.key
- job_name: 'nfs'
honor_labels: true
@@ -1268,6 +1304,8 @@ class TestMonitoring:
password: sd_password
tls_config:
ca_file: root_cert.pem
+ cert_file: prometheus.crt
+ key_file: prometheus.key
- job_name: 'smb'
honor_labels: true
@@ -1281,6 +1319,8 @@ class TestMonitoring:
password: sd_password
tls_config:
ca_file: root_cert.pem
+ cert_file: prometheus.crt
+ key_file: prometheus.key
""").lstrip()
@@ -2071,6 +2111,26 @@ class TestRGWService:
})
assert f == expected
+ @pytest.mark.parametrize(
+ "disable_sync_traffic",
+ [
+ (True),
+ (False),
+ ]
+ )
+ @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+ def test_rgw_disable_sync_traffic(self, disable_sync_traffic, cephadm_module: CephadmOrchestrator):
+ with with_host(cephadm_module, 'host1'):
+ s = RGWSpec(service_id="foo",
+ disable_multisite_sync_traffic=disable_sync_traffic)
+ with with_service(cephadm_module, s) as dds:
+ _, f, _ = cephadm_module.check_mon_command({
+ 'prefix': 'config get',
+ 'who': f'client.{dds[0]}',
+ 'key': 'rgw_run_sync_thread',
+ })
+ assert f == ('false' if disable_sync_traffic else 'true')
+
class TestMonService:
@@ -3874,6 +3934,7 @@ class TestMgmtGateway:
http {
#access_log /dev/stdout;
+ error_log /dev/stderr info;
client_header_buffer_size 32K;
large_client_header_buffers 4 32k;
proxy_busy_buffers_size 512k;
@@ -4121,6 +4182,7 @@ class TestMgmtGateway:
http {
#access_log /dev/stdout;
+ error_log /dev/stderr info;
client_header_buffer_size 32K;
large_client_header_buffers 4 32k;
proxy_busy_buffers_size 512k;
diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py
index a505801eea5..4fbc975ae9f 100644
--- a/src/pybind/mgr/orchestrator/_interface.py
+++ b/src/pybind/mgr/orchestrator/_interface.py
@@ -747,6 +747,10 @@ class Orchestrator(object):
"""
raise NotImplementedError()
+ def set_osd_spec(self, service_name: str, osd_ids: List[str]) -> OrchResult:
+ """ set service of osd """
+ raise NotImplementedError()
+
def blink_device_light(self, ident_fault: str, on: bool, locations: List['DeviceLightLoc']) -> OrchResult[List[str]]:
"""
Instructs the orchestrator to enable or disable either the ident or the fault LED.
diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py
index 332bc75d862..d5a1bb3da2b 100644
--- a/src/pybind/mgr/orchestrator/module.py
+++ b/src/pybind/mgr/orchestrator/module.py
@@ -1472,6 +1472,14 @@ Usage:
return HandleCommandResult(stdout=out)
+ @_cli_write_command('orch osd set-spec-affinity')
+ def _osd_set_spec(self, service_name: str, osd_id: List[str]) -> HandleCommandResult:
+ """Set service spec affinity for osd"""
+ completion = self.set_osd_spec(service_name, osd_id)
+ res = raise_if_exception(completion)
+
+ return HandleCommandResult(stdout=res)
+
@_cli_write_command('orch daemon add')
def daemon_add_misc(self,
daemon_type: Optional[ServiceType] = None,
@@ -1666,7 +1674,13 @@ Usage:
specs: List[Union[ServiceSpec, HostSpec]] = []
# YAML '---' document separator with no content generates
# None entries in the output. Let's skip them silently.
- content = [o for o in yaml_objs if o is not None]
+ try:
+ content = [o for o in yaml_objs if o is not None]
+ except yaml.scanner.ScannerError as e:
+ msg = f"Invalid YAML received : {str(e)}"
+ self.log.exception(msg)
+ return HandleCommandResult(-errno.EINVAL, stderr=msg)
+
for s in content:
try:
spec = json_to_generic_spec(s)
@@ -2191,7 +2205,13 @@ Usage:
specs: List[TunedProfileSpec] = []
# YAML '---' document separator with no content generates
# None entries in the output. Let's skip them silently.
- content = [o for o in yaml_objs if o is not None]
+ try:
+ content = [o for o in yaml_objs if o is not None]
+ except yaml.scanner.ScannerError as e:
+ msg = f"Invalid YAML received : {str(e)}"
+ self.log.exception(msg)
+ return HandleCommandResult(-errno.EINVAL, stderr=msg)
+
for spec in content:
specs.append(TunedProfileSpec.from_json(spec))
else:
diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py
index 8a2a38b86ee..1ac9fa49e32 100644
--- a/src/python-common/ceph/deployment/service_spec.py
+++ b/src/python-common/ceph/deployment/service_spec.py
@@ -1231,6 +1231,7 @@ class RGWSpec(ServiceSpec):
rgw_bucket_counters_cache: Optional[bool] = False,
rgw_bucket_counters_cache_size: Optional[int] = None,
generate_cert: bool = False,
+ disable_multisite_sync_traffic: Optional[bool] = None,
):
assert service_type == 'rgw', service_type
@@ -1283,6 +1284,8 @@ class RGWSpec(ServiceSpec):
self.rgw_bucket_counters_cache_size = rgw_bucket_counters_cache_size
#: Whether we should generate a cert/key for the user if not provided
self.generate_cert = generate_cert
+ #: Used to make RGW not do multisite replication so it can dedicate to IO
+ self.disable_multisite_sync_traffic = disable_multisite_sync_traffic
def get_port_start(self) -> List[int]:
return [self.get_port()]
@@ -2328,6 +2331,7 @@ class AlertManagerSpec(MonitoringSpec):
user_data: Optional[Dict[str, Any]] = None,
config: Optional[Dict[str, str]] = None,
networks: Optional[List[str]] = None,
+ only_bind_port_on_networks: bool = False,
port: Optional[int] = None,
secure: bool = False,
extra_container_args: Optional[GeneralArgList] = None,
@@ -2358,6 +2362,7 @@ class AlertManagerSpec(MonitoringSpec):
# <webhook_configs> configuration.
self.user_data = user_data or {}
self.secure = secure
+ self.only_bind_port_on_networks = only_bind_port_on_networks
def get_port_start(self) -> List[int]:
return [self.get_port(), 9094]
@@ -2404,7 +2409,7 @@ class GrafanaSpec(MonitoringSpec):
self.protocol = protocol
# whether ports daemons for this service bind to should
- # bind to only hte networks listed in networks param, or
+ # bind to only the networks listed in networks param, or
# to all networks. Defaults to false which is saying to bind
# on all networks.
self.only_bind_port_on_networks = only_bind_port_on_networks
diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt
index 3727c525ce7..41e473e23f0 100644
--- a/src/rgw/CMakeLists.txt
+++ b/src/rgw/CMakeLists.txt
@@ -487,9 +487,9 @@ target_link_libraries(radosgw PRIVATE
install(TARGETS radosgw DESTINATION bin)
set(radosgw_admin_srcs
- rgw_admin.cc
- rgw_sync_checkpoint.cc
- rgw_orphan.cc)
+ radosgw-admin/radosgw-admin.cc
+ radosgw-admin/sync_checkpoint.cc
+ radosgw-admin/orphan.cc)
# this is unsatisfying and hopefully temporary; ARROW should not be
# part of radosgw_admin
diff --git a/src/rgw/driver/daos/rgw_sal_daos.cc b/src/rgw/driver/daos/rgw_sal_daos.cc
index a87d88c4b85..92dd7afe2fb 100644
--- a/src/rgw/driver/daos/rgw_sal_daos.cc
+++ b/src/rgw/driver/daos/rgw_sal_daos.cc
@@ -858,8 +858,6 @@ bool DaosZone::is_writeable() { return true; }
bool DaosZone::get_redirect_endpoint(std::string* endpoint) { return false; }
-bool DaosZone::has_zonegroup_api(const std::string& api) const { return false; }
-
const std::string& DaosZone::get_current_period_id() {
return current_period->get_id();
}
diff --git a/src/rgw/driver/daos/rgw_sal_daos.h b/src/rgw/driver/daos/rgw_sal_daos.h
index e382fdb04ae..5515579a441 100644
--- a/src/rgw/driver/daos/rgw_sal_daos.h
+++ b/src/rgw/driver/daos/rgw_sal_daos.h
@@ -484,7 +484,6 @@ class DaosZone : public StoreZone {
virtual const std::string& get_name() const override;
virtual bool is_writeable() override;
virtual bool get_redirect_endpoint(std::string* endpoint) override;
- virtual bool has_zonegroup_api(const std::string& api) const override;
virtual const std::string& get_current_period_id() override;
virtual const RGWAccessKey& get_system_key() {
return zone_params->system_key;
diff --git a/src/rgw/driver/motr/rgw_sal_motr.cc b/src/rgw/driver/motr/rgw_sal_motr.cc
index b999673ac18..463ea8c5b11 100644
--- a/src/rgw/driver/motr/rgw_sal_motr.cc
+++ b/src/rgw/driver/motr/rgw_sal_motr.cc
@@ -1111,11 +1111,6 @@ bool MotrZone::get_redirect_endpoint(std::string* endpoint)
return false;
}
-bool MotrZone::has_zonegroup_api(const std::string& api) const
-{
- return (zonegroup.group.api_name == api);
-}
-
const std::string& MotrZone::get_current_period_id()
{
return current_period->get_id();
diff --git a/src/rgw/driver/motr/rgw_sal_motr.h b/src/rgw/driver/motr/rgw_sal_motr.h
index f92074b9d94..0f99ae48e86 100644
--- a/src/rgw/driver/motr/rgw_sal_motr.h
+++ b/src/rgw/driver/motr/rgw_sal_motr.h
@@ -525,7 +525,6 @@ class MotrZone : public StoreZone {
virtual const std::string& get_name() const override;
virtual bool is_writeable() override;
virtual bool get_redirect_endpoint(std::string* endpoint) override;
- virtual bool has_zonegroup_api(const std::string& api) const override;
virtual const std::string& get_current_period_id() override;
virtual const RGWAccessKey& get_system_key() { return zone_params->system_key; }
virtual const std::string& get_realm_name() { return realm->get_name(); }
diff --git a/src/rgw/driver/posix/rgw_sal_posix.cc b/src/rgw/driver/posix/rgw_sal_posix.cc
index 1345468210f..9d76462baa0 100644
--- a/src/rgw/driver/posix/rgw_sal_posix.cc
+++ b/src/rgw/driver/posix/rgw_sal_posix.cc
@@ -2893,6 +2893,14 @@ int POSIXObject::copy_object(const ACLOwner& owner,
return dobj->set_obj_attrs(dpp, &attrs, nullptr, y, rgw::sal::FLAG_LOG_OP);
}
+int POSIXObject::list_parts(const DoutPrefixProvider* dpp, CephContext* cct,
+ int max_parts, int marker, int* next_marker,
+ bool* truncated, list_parts_each_t each_func,
+ optional_yield y)
+{
+ return -EOPNOTSUPP;
+}
+
int POSIXObject::load_obj_state(const DoutPrefixProvider* dpp, optional_yield y, bool follow_olh)
{
int ret = stat(dpp);
diff --git a/src/rgw/driver/posix/rgw_sal_posix.h b/src/rgw/driver/posix/rgw_sal_posix.h
index 8ec72bbc1bc..bf3478ad6ab 100644
--- a/src/rgw/driver/posix/rgw_sal_posix.h
+++ b/src/rgw/driver/posix/rgw_sal_posix.h
@@ -653,6 +653,13 @@ public:
const DoutPrefixProvider* dpp, optional_yield y) override;
virtual RGWAccessControlPolicy& get_acl(void) override { return acls; }
virtual int set_acl(const RGWAccessControlPolicy& acl) override { acls = acl; return 0; }
+
+ /** If multipart, enumerate (a range [marker..marker+[min(max_parts, parts_count-1)] of) parts of the object */
+ virtual int list_parts(const DoutPrefixProvider* dpp, CephContext* cct,
+ int max_parts, int marker, int* next_marker,
+ bool* truncated, list_parts_each_t each_func,
+ optional_yield y) override;
+
virtual int load_obj_state(const DoutPrefixProvider* dpp, optional_yield y, bool follow_olh = true) override;
virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs,
Attrs* delattrs, optional_yield y, uint32_t flags) override;
diff --git a/src/rgw/driver/rados/rgw_period.cc b/src/rgw/driver/rados/rgw_period.cc
index f18e8e46bc5..aacb9b6a09a 100644
--- a/src/rgw/driver/rados/rgw_period.cc
+++ b/src/rgw/driver/rados/rgw_period.cc
@@ -68,20 +68,6 @@ int RGWPeriod::delete_obj(const DoutPrefixProvider *dpp, optional_yield y)
return ret;
}
-int RGWPeriod::add_zonegroup(const DoutPrefixProvider *dpp, const RGWZoneGroup& zonegroup, optional_yield y)
-{
- if (zonegroup.realm_id != realm_id) {
- return 0;
- }
- int ret = period_map.update(zonegroup, cct);
- if (ret < 0) {
- ldpp_dout(dpp, 0) << "ERROR: updating period map: " << cpp_strerror(-ret) << dendl;
- return ret;
- }
-
- return store_info(dpp, false, y);
-}
-
int RGWPeriod::update(const DoutPrefixProvider *dpp, optional_yield y)
{
auto zone_svc = sysobj_svc->get_zone_svc();
diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc
index 0b77bca1da7..69075c506f1 100644
--- a/src/rgw/driver/rados/rgw_rados.cc
+++ b/src/rgw/driver/rados/rgw_rados.cc
@@ -6962,13 +6962,13 @@ int RGWRados::set_attrs(const DoutPrefixProvider *dpp, RGWObjectCtx* octx, RGWBu
}
return 0;
-}
+} /* RGWRados::set_attrs() */
-static int get_part_obj_state(const DoutPrefixProvider* dpp, optional_yield y,
- RGWRados* store, RGWBucketInfo& bucket_info,
- RGWObjectCtx* rctx, RGWObjManifest* manifest,
- int part_num, int* parts_count, bool prefetch,
- RGWObjState** pstate, RGWObjManifest** pmanifest)
+int RGWRados::get_part_obj_state(const DoutPrefixProvider* dpp, optional_yield y,
+ RGWRados* store, RGWBucketInfo& bucket_info,
+ RGWObjectCtx* rctx, RGWObjManifest* manifest,
+ int part_num, int* parts_count, bool prefetch,
+ RGWObjState** pstate, RGWObjManifest** pmanifest)
{
if (!manifest) {
return -ERR_INVALID_PART;
@@ -7047,6 +7047,9 @@ static int get_part_obj_state(const DoutPrefixProvider* dpp, optional_yield y,
// update the object size
sm->state.size = part_manifest.get_obj_size();
+ if (!sm->state.attrset.count(RGW_ATTR_COMPRESSION)) {
+ sm->state.accounted_size = sm->state.size;
+ }
*pmanifest = &part_manifest;
return 0;
diff --git a/src/rgw/driver/rados/rgw_rados.h b/src/rgw/driver/rados/rgw_rados.h
index b24823b60dc..fe79916392f 100644
--- a/src/rgw/driver/rados/rgw_rados.h
+++ b/src/rgw/driver/rados/rgw_rados.h
@@ -1071,6 +1071,12 @@ public:
}; // class RGWRados::Bucket::List
}; // class RGWRados::Bucket
+ static int get_part_obj_state(const DoutPrefixProvider* dpp, optional_yield y,
+ RGWRados* store, RGWBucketInfo& bucket_info,
+ RGWObjectCtx* rctx, RGWObjManifest* manifest,
+ int part_num, int* parts_count, bool prefetch,
+ RGWObjState** pstate, RGWObjManifest** pmanifest);
+
int on_last_entry_in_listing(const DoutPrefixProvider *dpp,
RGWBucketInfo& bucket_info,
const std::string& obj_prefix,
diff --git a/src/rgw/driver/rados/rgw_sal_rados.cc b/src/rgw/driver/rados/rgw_sal_rados.cc
index 88da446c3de..4c67d0ee71a 100644
--- a/src/rgw/driver/rados/rgw_sal_rados.cc
+++ b/src/rgw/driver/rados/rgw_sal_rados.cc
@@ -2471,7 +2471,108 @@ bool RadosObject::is_sync_completed(const DoutPrefixProvider* dpp,
const rgw_bi_log_entry& earliest_marker = entries.front();
return earliest_marker.timestamp > obj_mtime;
-}
+} /* is_sync_completed */
+
+int RadosObject::list_parts(const DoutPrefixProvider* dpp, CephContext* cct,
+ int max_parts, int marker, int* next_marker,
+ bool* truncated, list_parts_each_t each_func,
+ optional_yield y)
+{
+ int ret{0};
+
+ /* require an object with a manifest, so call to get_obj_state() must precede this */
+ if (! manifest) {
+ return -EINVAL;
+ }
+
+ RGWObjManifest::obj_iterator end = manifest->obj_end(dpp);
+ if (end.get_cur_part_id() == 0) { // not multipart
+ ldpp_dout(dpp, 20) << __func__ << " object does not have a multipart manifest"
+ << dendl;
+ return 0;
+ }
+
+ auto end_part_id = end.get_cur_part_id();
+ auto parts_count = (end_part_id == 1) ? 1 : end_part_id - 1;
+ if (marker > (parts_count - 1)) {
+ return 0;
+ }
+
+ RGWObjManifest::obj_iterator part_iter = manifest->obj_begin(dpp);
+
+ if (marker != 0) {
+ ldpp_dout_fmt(dpp, 20,
+ "{} seeking to part #{} in the object manifest",
+ __func__, marker);
+
+ part_iter = manifest->obj_find_part(dpp, marker + 1);
+
+ if (part_iter == end) {
+ ldpp_dout_fmt(dpp, 5,
+ "{} failed to find part #{} in the object manifest",
+ __func__, marker + 1);
+ return 0;
+ }
+ }
+
+ RGWObjectCtx& obj_ctx = get_ctx();
+ RGWBucketInfo& bucket_info = get_bucket()->get_info();
+
+ Object::Part obj_part{};
+ for (; part_iter != manifest->obj_end(dpp); ++part_iter) {
+
+ /* we're only interested in the first object in each logical part */
+ auto cur_part_id = part_iter.get_cur_part_id();
+ if (cur_part_id == obj_part.part_number) {
+ continue;
+ }
+
+ if (max_parts < 1) {
+ *truncated = true;
+ break;
+ }
+
+ /* get_part_obj_state alters the passed manifest** to point to a part
+ * manifest, which we don't want to leak out here */
+ RGWObjManifest* obj_m = manifest;
+ RGWObjState* astate;
+ bool part_prefetch = false;
+ ret = RGWRados::get_part_obj_state(dpp, y, store->getRados(), bucket_info, &obj_ctx,
+ obj_m, cur_part_id, &parts_count,
+ part_prefetch, &astate, &obj_m);
+
+ if (ret < 0) {
+ ldpp_dout_fmt(dpp, 4,
+ "{} get_part_obj_state() failed ret={}",
+ __func__, ret);
+ break;
+ }
+
+ obj_part.part_number = part_iter.get_cur_part_id();
+ obj_part.part_size = astate->accounted_size;
+
+ if (auto iter = astate->attrset.find(RGW_ATTR_CKSUM);
+ iter != astate->attrset.end()) {
+ try {
+ rgw::cksum::Cksum part_cksum;
+ auto ck_iter = iter->second.cbegin();
+ part_cksum.decode(ck_iter);
+ obj_part.cksum = std::move(part_cksum);
+ } catch (buffer::error& err) {
+ ldpp_dout_fmt(dpp, 4,
+ "WARN: {} could not decode stored cksum, "
+ "caught buffer::error",
+ __func__);
+ }
+ }
+
+ each_func(obj_part);
+ *next_marker = ++marker;
+ --max_parts;
+ } /* each part */
+
+ return ret;
+} /* RadosObject::list_parts */
int RadosObject::load_obj_state(const DoutPrefixProvider* dpp, optional_yield y, bool follow_olh)
{
@@ -4500,11 +4601,6 @@ bool RadosZone::get_redirect_endpoint(std::string* endpoint)
return true;
}
-bool RadosZone::has_zonegroup_api(const std::string& api) const
-{
- return store->svc()->zone->has_zonegroup_api(api);
-}
-
const std::string& RadosZone::get_current_period_id()
{
return store->svc()->zone->get_current_period_id();
diff --git a/src/rgw/driver/rados/rgw_sal_rados.h b/src/rgw/driver/rados/rgw_sal_rados.h
index 23d81a934b0..85ea247e345 100644
--- a/src/rgw/driver/rados/rgw_sal_rados.h
+++ b/src/rgw/driver/rados/rgw_sal_rados.h
@@ -107,7 +107,6 @@ class RadosZone : public StoreZone {
virtual const std::string& get_name() const override;
virtual bool is_writeable() override;
virtual bool get_redirect_endpoint(std::string* endpoint) override;
- virtual bool has_zonegroup_api(const std::string& api) const override;
virtual const std::string& get_current_period_id() override;
virtual const RGWAccessKey& get_system_key() override;
virtual const std::string& get_realm_name() override;
@@ -593,12 +592,18 @@ class RadosObject : public StoreObject {
StoreObject::set_compressed();
}
-
virtual bool is_sync_completed(const DoutPrefixProvider* dpp,
const ceph::real_time& obj_mtime) override;
/* For rgw_admin.cc */
RGWObjState& get_state() { return state; }
virtual int load_obj_state(const DoutPrefixProvider* dpp, optional_yield y, bool follow_olh = true) override;
+
+ /** If multipart, enumerate (a range [marker..marker+[min(max_parts, parts_count-1)] of) parts of the object */
+ virtual int list_parts(const DoutPrefixProvider* dpp, CephContext* cct,
+ int max_parts, int marker, int* next_marker,
+ bool* truncated, list_parts_each_t each_func,
+ optional_yield y) override;
+
virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y, uint32_t flags) override;
virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj = NULL) override;
virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val, optional_yield y, const DoutPrefixProvider* dpp) override;
diff --git a/src/rgw/driver/rados/rgw_tools.cc b/src/rgw/driver/rados/rgw_tools.cc
index 79d2be0bcfa..bf7a309e864 100644
--- a/src/rgw/driver/rados/rgw_tools.cc
+++ b/src/rgw/driver/rados/rgw_tools.cc
@@ -339,21 +339,35 @@ int rgw_list_pool(const DoutPrefixProvider *dpp,
ldpp_dout(dpp, 10) << "failed to parse cursor: " << marker << dendl;
return -EINVAL;
}
-
- auto iter = ioctx.nobjects_begin(oc);
+ librados::NObjectIterator iter;
+ try {
+ iter = ioctx.nobjects_begin(oc);
+ } catch (const std::system_error& e) {
+ ldpp_dout(dpp, 1) << "rgw_list_pool: Failed to begin iteration of pool "
+ << ioctx.get_pool_name() << " with error "
+ << e.what() << dendl;
+ return ceph::from_error_code(e.code());
+ }
/// Pool_iterate
if (iter == ioctx.nobjects_end())
return -ENOENT;
- for (; oids->size() < max && iter != ioctx.nobjects_end(); ++iter) {
- string oid = iter->get_oid();
- ldpp_dout(dpp, 20) << "RGWRados::pool_iterate: got " << oid << dendl;
+ try {
+ for (; oids->size() < max && iter != ioctx.nobjects_end(); ++iter) {
+ string oid = iter->get_oid();
+ ldpp_dout(dpp, 20) << "RGWRados::pool_iterate: got " << oid << dendl;
- // fill it in with initial values; we may correct later
- if (filter && !filter(oid, oid))
- continue;
+ // fill it in with initial values; we may correct later
+ if (filter && !filter(oid, oid))
+ continue;
- oids->push_back(oid);
+ oids->push_back(oid);
+ }
+ } catch (const std::system_error& e) {
+ ldpp_dout(dpp, 1) << "rgw_list_pool: Failed iterating pool "
+ << ioctx.get_pool_name() << " with error "
+ << e.what() << dendl;
+ return ceph::from_error_code(e.code());
}
marker = iter.get_cursor().to_str();
diff --git a/src/rgw/driver/rados/rgw_user.h b/src/rgw/driver/rados/rgw_user.h
index ab157f38e39..4ae7d13eff7 100644
--- a/src/rgw/driver/rados/rgw_user.h
+++ b/src/rgw/driver/rados/rgw_user.h
@@ -19,11 +19,11 @@
#define RGW_USER_ANON_ID "anonymous"
-#define SECRET_KEY_LEN 40
-#define PUBLIC_ID_LEN 20
-#define RAND_SUBUSER_LEN 5
+constexpr auto SECRET_KEY_LEN=40;
+constexpr auto PUBLIC_ID_LEN=20;
+constexpr auto RAND_SUBUSER_LEN=5;
-#define XMLNS_AWS_S3 "http://s3.amazonaws.com/doc/2006-03-01/"
+constexpr auto XMLNS_AWS_S3 = "http://s3.amazonaws.com/doc/2006-03-01/";
class RGWUserCtl;
class RGWBucketCtl;
diff --git a/src/rgw/driver/rados/rgw_zone.h b/src/rgw/driver/rados/rgw_zone.h
index c542abc76d6..5fb2b4b8096 100644
--- a/src/rgw/driver/rados/rgw_zone.h
+++ b/src/rgw/driver/rados/rgw_zone.h
@@ -769,7 +769,6 @@ public:
int create(const DoutPrefixProvider *dpp, optional_yield y, bool exclusive = true);
int delete_obj(const DoutPrefixProvider *dpp, optional_yield y);
int store_info(const DoutPrefixProvider *dpp, bool exclusive, optional_yield y);
- int add_zonegroup(const DoutPrefixProvider *dpp, const RGWZoneGroup& zonegroup, optional_yield y);
void fork();
int update(const DoutPrefixProvider *dpp, optional_yield y);
diff --git a/src/rgw/rgw_orphan.cc b/src/rgw/radosgw-admin/orphan.cc
index b7dc562c721..9fca3b99a7c 100644
--- a/src/rgw/rgw_orphan.cc
+++ b/src/rgw/radosgw-admin/orphan.cc
@@ -1,6 +1,12 @@
+
+/*
+ * Copyright (C) 2024 IBM
+*/
+
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab ft=cpp
+#include "radosgw-admin/orphan.h"
#include <string>
@@ -10,7 +16,6 @@
#include "rgw_op.h"
#include "rgw_multi.h"
-#include "rgw_orphan.h"
#include "rgw_zone.h"
#include "rgw_bucket.h"
#include "rgw_sal_rados.h"
diff --git a/src/rgw/rgw_orphan.h b/src/rgw/radosgw-admin/orphan.h
index db811d31d9a..db811d31d9a 100644
--- a/src/rgw/rgw_orphan.h
+++ b/src/rgw/radosgw-admin/orphan.h
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/radosgw-admin/radosgw-admin.cc
index f2c1e81b540..182e42b8e31 100644
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/radosgw-admin/radosgw-admin.cc
@@ -1,12 +1,15 @@
+/*
+ * Copyright (C) 2025 IBM
+*/
+
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab ft=cpp
-#include <errno.h>
-#include <iostream>
-#include <sstream>
+#include <cerrno>
#include <string>
-
-#include <boost/optional.hpp>
+#include <sstream>
+#include <optional>
+#include <iostream>
extern "C" {
#include <liboath/oath.h>
@@ -38,6 +41,9 @@ extern "C" {
#include "include/utime.h"
#include "include/str_list.h"
+#include "radosgw-admin/orphan.h"
+#include "radosgw-admin/sync_checkpoint.h"
+
#include "rgw_user.h"
#include "rgw_otp.h"
#include "rgw_rados.h"
@@ -48,7 +54,6 @@ extern "C" {
#include "rgw_log.h"
#include "rgw_formats.h"
#include "rgw_usage.h"
-#include "rgw_orphan.h"
#include "rgw_sync.h"
#include "rgw_trim_bilog.h"
#include "rgw_trim_datalog.h"
@@ -62,7 +67,6 @@ extern "C" {
#include "rgw_zone.h"
#include "rgw_pubsub.h"
#include "rgw_bucket_sync.h"
-#include "rgw_sync_checkpoint.h"
#include "rgw_lua.h"
#include "rgw_sal.h"
#include "rgw_sal_config.h"
@@ -82,11 +86,6 @@ extern "C" {
#define dout_context g_ceph_context
-#define SECRET_KEY_LEN 40
-#define PUBLIC_ID_LEN 20
-
-using namespace std;
-
static rgw::sal::Driver* driver = NULL;
static constexpr auto dout_subsys = ceph_subsys_rgw;
@@ -117,19 +116,13 @@ static const DoutPrefixProvider* dpp() {
} \
} while (0)
-static inline int posix_errortrans(int r)
+using namespace std;
+
+inline int posix_errortrans(int r)
{
- switch(r) {
- case ERR_NO_SUCH_BUCKET:
- r = ENOENT;
- break;
- default:
- break;
- }
- return r;
+ return ERR_NO_SUCH_BUCKET == r ? ENOENT : r;
}
-
static const std::string LUA_CONTEXT_LIST("prerequest, postrequest, background, getdata, putdata");
void usage()
@@ -1272,7 +1265,7 @@ static int read_input(const string& infile, bufferlist& bl)
}
}
-#define READ_CHUNK 8196
+ constexpr auto READ_CHUNK=8196;
int r;
int err;
diff --git a/src/rgw/rgw_sync_checkpoint.cc b/src/rgw/radosgw-admin/sync_checkpoint.cc
index 1172e79a48f..0303ed6c747 100644
--- a/src/rgw/rgw_sync_checkpoint.cc
+++ b/src/rgw/radosgw-admin/sync_checkpoint.cc
@@ -5,6 +5,7 @@
* Ceph - scalable distributed file system
*
* Copyright (C) 2020 Red Hat, Inc.
+ * Copyright (C) 2024 IBM
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -13,9 +14,12 @@
*
*/
+#include "radosgw-admin/sync_checkpoint.h"
+
#include <fmt/format.h>
+
#include "common/errno.h"
-#include "rgw_sync_checkpoint.h"
+
#include "rgw_sal_rados.h"
#include "rgw_bucket_sync.h"
#include "rgw_data_sync.h"
diff --git a/src/rgw/rgw_sync_checkpoint.h b/src/rgw/radosgw-admin/sync_checkpoint.h
index 28df68d8860..28df68d8860 100644
--- a/src/rgw/rgw_sync_checkpoint.h
+++ b/src/rgw/radosgw-admin/sync_checkpoint.h
diff --git a/src/rgw/rgw_cksum_pipe.cc b/src/rgw/rgw_cksum_pipe.cc
index e06957e2715..0bec8d341af 100644
--- a/src/rgw/rgw_cksum_pipe.cc
+++ b/src/rgw/rgw_cksum_pipe.cc
@@ -18,6 +18,7 @@
#include <string>
#include <fmt/format.h>
#include <boost/algorithm/string.hpp>
+#include "rgw_cksum.h"
#include "rgw_common.h"
#include "common/dout.h"
#include "rgw_client_io.h"
@@ -34,7 +35,8 @@ namespace rgw::putobj {
{}
std::unique_ptr<RGWPutObj_Cksum> RGWPutObj_Cksum::Factory(
- rgw::sal::DataProcessor* next, const RGWEnv& env)
+ rgw::sal::DataProcessor* next, const RGWEnv& env,
+ rgw::cksum::Type override_type)
{
/* look for matching headers */
auto algo_header = cksum_algorithm_hdr(env);
@@ -49,6 +51,13 @@ namespace rgw::putobj {
throw rgw::io::Exception(EINVAL, std::system_category());
}
/* no checksum header */
+ if (override_type != rgw::cksum::Type::none) {
+ /* XXXX safe? do we need to fixup env as well? */
+ auto algo_header = cksum_algorithm_hdr(override_type);
+ return
+ std::make_unique<RGWPutObj_Cksum>(
+ next, override_type, std::move(algo_header));
+ }
return std::unique_ptr<RGWPutObj_Cksum>();
}
diff --git a/src/rgw/rgw_cksum_pipe.h b/src/rgw/rgw_cksum_pipe.h
index fddcd283c84..c459d156335 100644
--- a/src/rgw/rgw_cksum_pipe.h
+++ b/src/rgw/rgw_cksum_pipe.h
@@ -20,6 +20,7 @@
#include <tuple>
#include <cstring>
#include <boost/algorithm/string/case_conv.hpp>
+#include "rgw_cksum.h"
#include "rgw_cksum_digest.h"
#include "rgw_common.h"
#include "rgw_putobj.h"
@@ -29,6 +30,38 @@ namespace rgw::putobj {
namespace cksum = rgw::cksum;
using cksum_hdr_t = std::pair<const char*, const char*>;
+ static inline const cksum_hdr_t cksum_algorithm_hdr(rgw::cksum::Type t) {
+ static constexpr std::string_view hdr =
+ "HTTP_X_AMZ_SDK_CHECKSUM_ALGORITHM";
+ using rgw::cksum::Type;
+ switch (t) {
+ case Type::sha256:
+ return cksum_hdr_t(hdr.data(), "SHA256");
+ break;
+ case Type::crc32:
+ return cksum_hdr_t(hdr.data(), "CRC32");
+ break;
+ case Type::crc32c:
+ return cksum_hdr_t(hdr.data(), "CRC32C");
+ break;
+ case Type::xxh3:
+ return cksum_hdr_t(hdr.data(), "XX3");
+ break;
+ case Type::sha1:
+ return cksum_hdr_t(hdr.data(), "SHA1");
+ break;
+ case Type::sha512:
+ return cksum_hdr_t(hdr.data(), "SHA512");
+ break;
+ case Type::blake3:
+ return cksum_hdr_t(hdr.data(), "BLAKE3");
+ break;
+ default:
+ break;
+ };
+ return cksum_hdr_t(nullptr, nullptr);;
+ }
+
static inline const cksum_hdr_t cksum_algorithm_hdr(const RGWEnv& env) {
/* If the individual checksum value you provide through
x-amz-checksum-algorithm doesn't match the checksum algorithm
@@ -102,7 +135,8 @@ namespace rgw::putobj {
using VerifyResult = std::tuple<bool, const cksum::Cksum&>;
static std::unique_ptr<RGWPutObj_Cksum> Factory(
- rgw::sal::DataProcessor* next, const RGWEnv&);
+ rgw::sal::DataProcessor* next, const RGWEnv&,
+ rgw::cksum::Type override_type);
RGWPutObj_Cksum(rgw::sal::DataProcessor* next, rgw::cksum::Type _type,
cksum_hdr_t&& _hdr);
diff --git a/src/rgw/rgw_common.cc b/src/rgw/rgw_common.cc
index 97c46f12bd0..6610538542c 100644
--- a/src/rgw/rgw_common.cc
+++ b/src/rgw/rgw_common.cc
@@ -63,6 +63,7 @@ rgw_http_errors rgw_http_s3_errors({
{ ERR_INVALID_DIGEST, {400, "InvalidDigest" }},
{ ERR_BAD_DIGEST, {400, "BadDigest" }},
{ ERR_INVALID_LOCATION_CONSTRAINT, {400, "InvalidLocationConstraint" }},
+ { ERR_ILLEGAL_LOCATION_CONSTRAINT_EXCEPTION, {400, "IllegalLocationConstraintException" }},
{ ERR_ZONEGROUP_DEFAULT_PLACEMENT_MISCONFIGURATION, {400, "ZonegroupDefaultPlacementMisconfiguration" }},
{ ERR_INVALID_BUCKET_NAME, {400, "InvalidBucketName" }},
{ ERR_INVALID_OBJECT_NAME, {400, "InvalidObjectName" }},
diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h
index f0bd41494c3..d2917838f36 100644
--- a/src/rgw/rgw_common.h
+++ b/src/rgw/rgw_common.h
@@ -337,6 +337,7 @@ inline constexpr const char* RGW_REST_STS_XMLNS =
#define ERR_PRESIGNED_URL_EXPIRED 2223
#define ERR_PRESIGNED_URL_DISABLED 2224
#define ERR_AUTHORIZATION 2225 // SNS 403 AuthorizationError
+#define ERR_ILLEGAL_LOCATION_CONSTRAINT_EXCEPTION 2226
#define ERR_BUSY_RESHARDING 2300 // also in cls_rgw_types.h, don't change!
#define ERR_NO_SUCH_ENTITY 2301
diff --git a/src/rgw/rgw_iam_policy.cc b/src/rgw/rgw_iam_policy.cc
index 2a5c9cd313e..ef6761d4222 100644
--- a/src/rgw/rgw_iam_policy.cc
+++ b/src/rgw/rgw_iam_policy.cc
@@ -94,6 +94,8 @@ static const actpair actpairs[] =
{ "s3:GetPublicAccessBlock", s3GetPublicAccessBlock },
{ "s3:GetObjectAcl", s3GetObjectAcl },
{ "s3:GetObject", s3GetObject },
+ { "s3:GetObjectAttributes", s3GetObjectAttributes },
+ { "s3:GetObjectVersionAttributes", s3GetObjectVersionAttributes },
{ "s3:GetObjectTorrent", s3GetObjectTorrent },
{ "s3:GetObjectVersionAcl", s3GetObjectVersionAcl },
{ "s3:GetObjectVersion", s3GetObjectVersion },
@@ -1335,6 +1337,7 @@ const char* action_bit_string(uint64_t action) {
case s3ListBucketVersions:
return "s3:ListBucketVersions";
+
case s3ListAllMyBuckets:
return "s3:ListAllMyBuckets";
@@ -1479,6 +1482,12 @@ const char* action_bit_string(uint64_t action) {
case s3BypassGovernanceRetention:
return "s3:BypassGovernanceRetention";
+ case s3GetObjectAttributes:
+ return "s3:GetObjectAttributes";
+
+ case s3GetObjectVersionAttributes:
+ return "s3:GetObjectVersionAttributes";
+
case s3DescribeJob:
return "s3:DescribeJob";
diff --git a/src/rgw/rgw_iam_policy.h b/src/rgw/rgw_iam_policy.h
index 0476926143f..dd323ee4b9c 100644
--- a/src/rgw/rgw_iam_policy.h
+++ b/src/rgw/rgw_iam_policy.h
@@ -115,6 +115,8 @@ enum {
s3GetBucketEncryption,
s3PutBucketEncryption,
s3DescribeJob,
+ s3GetObjectAttributes,
+ s3GetObjectVersionAttributes,
s3All,
s3objectlambdaGetObject,
@@ -247,6 +249,8 @@ inline int op_to_perm(std::uint64_t op) {
case s3GetObjectVersionTagging:
case s3GetObjectRetention:
case s3GetObjectLegalHold:
+ case s3GetObjectAttributes:
+ case s3GetObjectVersionAttributes:
case s3ListAllMyBuckets:
case s3ListBucket:
case s3ListBucketMultipartUploads:
diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc
index 5ce0033de47..7b0ca3134a3 100644
--- a/src/rgw/rgw_op.cc
+++ b/src/rgw/rgw_op.cc
@@ -25,8 +25,10 @@
#include "common/ceph_json.h"
#include "common/static_ptr.h"
#include "common/perf_counters_key.h"
+#include "rgw_cksum.h"
#include "rgw_cksum_digest.h"
#include "rgw_common.h"
+#include "common/split.h"
#include "rgw_tracer.h"
#include "rgw_rados.h"
@@ -3558,54 +3560,62 @@ void RGWCreateBucket::execute(optional_yield y)
const rgw::SiteConfig& site = *s->penv.site;
const std::optional<RGWPeriod>& period = site.get_period();
const RGWZoneGroup& my_zonegroup = site.get_zonegroup();
-
- if (s->system_request) {
- // allow system requests to override the target zonegroup. for forwarded
- // requests, we'll create the bucket for the originating zonegroup
- createparams.zonegroup_id = s->info.args.get(RGW_SYS_PARAM_PREFIX "zonegroup");
- }
-
+ const std::string rgwx_zonegroup = s->info.args.get(RGW_SYS_PARAM_PREFIX "zonegroup");
const RGWZoneGroup* bucket_zonegroup = &my_zonegroup;
- if (createparams.zonegroup_id.empty()) {
- // default to the local zonegroup
- createparams.zonegroup_id = my_zonegroup.id;
- } else if (period) {
- auto z = period->period_map.zonegroups.find(createparams.zonegroup_id);
- if (z == period->period_map.zonegroups.end()) {
- ldpp_dout(this, 0) << "could not find zonegroup "
- << createparams.zonegroup_id << " in current period" << dendl;
- op_ret = -ENOENT;
- return;
- }
- bucket_zonegroup = &z->second;
- } else if (createparams.zonegroup_id != my_zonegroup.id) {
- ldpp_dout(this, 0) << "zonegroup does not match current zonegroup "
- << createparams.zonegroup_id << dendl;
- op_ret = -ENOENT;
- return;
- }
- // validate the LocationConstraint
+ // Validate LocationConstraint if it's provided and enforcement is strict
if (!location_constraint.empty() && !relaxed_region_enforcement) {
- // on the master zonegroup, allow any valid api_name. otherwise it has to
- // match the bucket's zonegroup
- if (period && my_zonegroup.is_master) {
- if (!period->period_map.zonegroups_by_api.count(location_constraint)) {
+ if (period) {
+ auto location_iter = period->period_map.zonegroups_by_api.find(location_constraint);
+ if (location_iter == period->period_map.zonegroups_by_api.end()) {
ldpp_dout(this, 0) << "location constraint (" << location_constraint
<< ") can't be found." << dendl;
op_ret = -ERR_INVALID_LOCATION_CONSTRAINT;
- s->err.message = "The specified location-constraint is not valid";
+ s->err.message = fmt::format("The {} location constraint is not valid.",
+ location_constraint);
return;
}
- } else if (bucket_zonegroup->api_name != location_constraint) {
+ bucket_zonegroup = &location_iter->second;
+ } else if (location_constraint != my_zonegroup.api_name) { // if we don't have a period, we can only use the current zonegroup - so check if the location matches by api name here
ldpp_dout(this, 0) << "location constraint (" << location_constraint
- << ") doesn't match zonegroup (" << bucket_zonegroup->api_name
- << ')' << dendl;
- op_ret = -ERR_INVALID_LOCATION_CONSTRAINT;
- s->err.message = "The specified location-constraint is not valid";
+ << ") doesn't match zonegroup (" << my_zonegroup.api_name << ")" << dendl;
+ op_ret = -ERR_ILLEGAL_LOCATION_CONSTRAINT_EXCEPTION;
+ s->err.message = fmt::format("The {} location constraint is incompatible "
+ "for the region specific endpoint this request was sent to.",
+ location_constraint);
return;
}
}
+ // If it's a system request, use the provided zonegroup if available
+ else if (s->system_request && !rgwx_zonegroup.empty()) {
+ if (period) {
+ auto zonegroup_iter = period->period_map.zonegroups.find(rgwx_zonegroup);
+ if (zonegroup_iter == period->period_map.zonegroups.end()) {
+ ldpp_dout(this, 0) << "could not find zonegroup " << rgwx_zonegroup
+ << " in current period" << dendl;
+ op_ret = -ENOENT;
+ return;
+ }
+ bucket_zonegroup = &zonegroup_iter->second;
+ }
+ }
+
+ const bool enforce_location_match =
+ !period || // No period: no multisite, so no need to enforce location match.
+ !s->system_request || // All user requests are enforced to match zonegroup's location.
+ !my_zonegroup.is_master; // but if it's a system request (forwarded) only allow remote creation on master zonegroup.
+ if (enforce_location_match && !my_zonegroup.equals(bucket_zonegroup->get_id())) {
+ ldpp_dout(this, 0) << "location constraint (" << bucket_zonegroup->api_name
+ << ") doesn't match zonegroup (" << my_zonegroup.api_name << ")" << dendl;
+ op_ret = -ERR_ILLEGAL_LOCATION_CONSTRAINT_EXCEPTION;
+ s->err.message = fmt::format("The {} location constraint is incompatible "
+ "for the region specific endpoint this request was sent to.",
+ bucket_zonegroup->api_name);
+ return;
+ }
+
+ // Set the final zonegroup ID
+ createparams.zonegroup_id = bucket_zonegroup->id;
// select and validate the placement target
op_ret = select_bucket_placement(this, *bucket_zonegroup, s->user->get_info(),
@@ -3614,7 +3624,7 @@ void RGWCreateBucket::execute(optional_yield y)
return;
}
- if (bucket_zonegroup == &my_zonegroup) {
+ if (my_zonegroup.equals(bucket_zonegroup->get_id())) {
// look up the zone placement pool
createparams.zone_placement = rgw::find_zone_placement(
this, site.get_zone_params(), createparams.placement_rule);
@@ -3703,7 +3713,6 @@ void RGWCreateBucket::execute(optional_yield y)
if (!driver->is_meta_master()) {
// apply bucket creation on the master zone first
- bufferlist in_data;
JSONParser jp;
op_ret = rgw_forward_request_to_master(this, *s->penv.site, s->owner.id,
&in_data, &jp, s->info, y);
@@ -3780,7 +3789,10 @@ void RGWCreateBucket::execute(optional_yield y)
s->bucket->get_info().has_website = !s->bucket->get_info().website_conf.is_empty();
/* This will also set the quota on the bucket. */
- op_ret = s->bucket->merge_and_store_attrs(this, createparams.attrs, y);
+ s->bucket->set_attrs(std::move(createparams.attrs));
+ constexpr bool exclusive = false; // overwrite
+ constexpr ceph::real_time no_set_mtime{};
+ op_ret = s->bucket->put_info(this, exclusive, no_set_mtime, y);
} while (op_ret == -ECANCELED && tries++ < 20);
/* Restore the proper return code. */
@@ -4331,6 +4343,9 @@ void RGWPutObj::execute(optional_yield y)
}
return;
}
+
+ multipart_cksum_type = upload->cksum_type;
+
/* upload will go out of scope, so copy the dest placement for later use */
s->dest_placement = *pdest_placement;
pdest_placement = &s->dest_placement;
@@ -4461,11 +4476,12 @@ void RGWPutObj::execute(optional_yield y)
/* optional streaming checksum */
try {
cksum_filter =
- rgw::putobj::RGWPutObj_Cksum::Factory(filter, *s->info.env);
+ rgw::putobj::RGWPutObj_Cksum::Factory(filter, *s->info.env, multipart_cksum_type);
} catch (const rgw::io::Exception& e) {
op_ret = -e.code().value();
return;
}
+
if (cksum_filter) {
filter = &*cksum_filter;
}
@@ -4612,10 +4628,12 @@ void RGWPutObj::execute(optional_yield y)
if (cksum_filter) {
const auto& hdr = cksum_filter->header();
+ auto expected_ck = cksum_filter->expected(*s->info.env);
auto cksum_verify =
cksum_filter->verify(*s->info.env); // valid or no supplied cksum
cksum = get<1>(cksum_verify);
- if (std::get<0>(cksum_verify)) {
+ if ((!expected_ck) ||
+ std::get<0>(cksum_verify)) {
buffer::list cksum_bl;
ldpp_dout_fmt(this, 16,
@@ -4623,14 +4641,13 @@ void RGWPutObj::execute(optional_yield y)
"\n\tcomputed={} == \n\texpected={}",
hdr.second,
cksum->to_armor(),
- cksum_filter->expected(*s->info.env));
+ (!!expected_ck) ? expected_ck : "(checksum unavailable)");
cksum->encode(cksum_bl);
emplace_attr(RGW_ATTR_CKSUM, std::move(cksum_bl));
} else {
/* content checksum mismatch */
auto computed_ck = cksum->to_armor();
- auto expected_ck = cksum_filter->expected(*s->info.env);
ldpp_dout_fmt(this, 4,
"{} content checksum mismatch"
@@ -4833,7 +4850,8 @@ void RGWPostObj::execute(optional_yield y)
/* optional streaming checksum */
try {
cksum_filter =
- rgw::putobj::RGWPutObj_Cksum::Factory(filter, *s->info.env);
+ rgw::putobj::RGWPutObj_Cksum::Factory(
+ filter, *s->info.env, rgw::cksum::Type::none /* no override */);
} catch (const rgw::io::Exception& e) {
op_ret = -e.code().value();
return;
@@ -5181,7 +5199,10 @@ void RGWPutMetadataBucket::execute(optional_yield y)
/* Setting attributes also stores the provided bucket info. Due
* to this fact, the new quota settings can be serialized with
* the same call. */
- op_ret = s->bucket->merge_and_store_attrs(this, attrs, s->yield);
+ s->bucket->set_attrs(attrs);
+ constexpr bool exclusive = false; // overwrite
+ constexpr ceph::real_time no_set_mtime{};
+ op_ret = s->bucket->put_info(this, exclusive, no_set_mtime, s->yield);
return op_ret;
}, y);
}
@@ -5969,8 +5990,6 @@ void RGWGetACLs::execute(optional_yield y)
acls = ss.str();
}
-
-
int RGWPutACLs::verify_permission(optional_yield y)
{
bool perm;
@@ -5992,6 +6011,74 @@ int RGWPutACLs::verify_permission(optional_yield y)
return 0;
}
+uint16_t RGWGetObjAttrs::recognize_attrs(const std::string& hdr, uint16_t deflt)
+{
+ auto attrs{deflt};
+ auto sa = ceph::split(hdr, ",");
+ for (auto& k : sa) {
+ if (boost::iequals(k, "etag")) {
+ attrs |= as_flag(ReqAttributes::Etag);
+ }
+ if (boost::iequals(k, "checksum")) {
+ attrs |= as_flag(ReqAttributes::Checksum);
+ }
+ if (boost::iequals(k, "objectparts")) {
+ attrs |= as_flag(ReqAttributes::ObjectParts);
+ }
+ if (boost::iequals(k, "objectsize")) {
+ attrs |= as_flag(ReqAttributes::ObjectSize);
+ }
+ if (boost::iequals(k, "storageclass")) {
+ attrs |= as_flag(ReqAttributes::StorageClass);
+ }
+ }
+ return attrs;
+} /* RGWGetObjAttrs::recognize_attrs */
+
+int RGWGetObjAttrs::verify_permission(optional_yield y)
+{
+ bool perm = false;
+ auto [has_s3_existing_tag, has_s3_resource_tag] =
+ rgw_check_policy_condition(this, s);
+
+ if (! rgw::sal::Object::empty(s->object.get())) {
+
+ auto iam_action1 = s->object->get_instance().empty() ?
+ rgw::IAM::s3GetObject :
+ rgw::IAM::s3GetObjectVersion;
+
+ auto iam_action2 = s->object->get_instance().empty() ?
+ rgw::IAM::s3GetObjectAttributes :
+ rgw::IAM::s3GetObjectVersionAttributes;
+
+ if (has_s3_existing_tag || has_s3_resource_tag) {
+ rgw_iam_add_objtags(this, s, has_s3_existing_tag, has_s3_resource_tag);
+ }
+
+ /* XXXX the following conjunction should be &&--but iam_action2 is currently not
+ * hooked up and always fails (but should succeed if the requestor has READ
+ * acess to the object) */
+ perm = (verify_object_permission(this, s, iam_action1) || /* && */
+ verify_object_permission(this, s, iam_action2));
+ }
+
+ if (! perm) {
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+void RGWGetObjAttrs::pre_exec()
+{
+ rgw_bucket_object_pre_exec(s);
+}
+
+void RGWGetObjAttrs::execute(optional_yield y)
+{
+ RGWGetObj::execute(y);
+} /* RGWGetObjAttrs::execute */
+
int RGWGetLC::verify_permission(optional_yield y)
{
auto [has_s3_existing_tag, has_s3_resource_tag] = rgw_check_policy_condition(this, s, false);
@@ -6659,6 +6746,14 @@ try_sum_part_cksums(const DoutPrefixProvider *dpp,
++parts_ix;
auto& part_cksum = part.second->get_cksum();
+ if (! part_cksum) {
+ ldpp_dout_fmt(dpp, 0,
+ "ERROR: multipart part checksum not present (ix=={})",
+ parts_ix);
+ op_ret = -ERR_INVALID_REQUEST;
+ return op_ret;
+ }
+
ldpp_dout_fmt(dpp, 16,
"INFO: {} iterate part: {} {} {}",
__func__, parts_ix, part_cksum->type_string(),
diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h
index 9f747501729..dcf64c31572 100644
--- a/src/rgw/rgw_op.h
+++ b/src/rgw/rgw_op.h
@@ -12,6 +12,7 @@
#pragma once
+#include <cstdint>
#include <limits.h>
#include <array>
@@ -1111,6 +1112,7 @@ class RGWCreateBucket : public RGWOp {
bool relaxed_region_enforcement = false;
RGWCORSConfiguration cors_config;
std::set<std::string> rmattr_names;
+ bufferlist in_data;
virtual bool need_metadata_upload() const { return false; }
@@ -1237,6 +1239,7 @@ protected:
std::string multipart_upload_id;
std::string multipart_part_str;
int multipart_part_num = 0;
+ rgw::cksum::Type multipart_cksum_type{rgw::cksum::Type::none};
jspan_ptr multipart_trace;
boost::optional<ceph::real_time> delete_at;
@@ -1644,6 +1647,50 @@ public:
uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
};
+class RGWGetObjAttrs : public RGWGetObj {
+protected:
+ std::string version_id;
+ std::string expected_bucket_owner;
+ std::optional<int> marker;
+ std::optional<int> max_parts;
+ uint16_t requested_attributes{0};
+#if 0
+ /* used to decrypt attributes for objects stored with SSE-C */
+ x-amz-server-side-encryption-customer-algorithm
+ x-amz-server-side-encryption-customer-key
+ x-amz-server-side-encryption-customer-key-MD5
+#endif
+public:
+
+ enum class ReqAttributes : uint16_t {
+ None = 0,
+ Etag,
+ Checksum,
+ ObjectParts,
+ StorageClass,
+ ObjectSize
+ };
+
+ static uint16_t as_flag(ReqAttributes attr) {
+ return 1 << (uint16_t(attr) ? uint16_t(attr) - 1 : 0);
+ }
+
+ static uint16_t recognize_attrs(const std::string& hdr, uint16_t deflt = 0);
+
+ RGWGetObjAttrs() : RGWGetObj()
+ {
+ RGWGetObj::get_data = false; // it's extra false
+ }
+
+ int verify_permission(optional_yield y) override;
+ void pre_exec() override;
+ void execute(optional_yield y) override;
+ void send_response() override = 0;
+ const char* name() const override { return "get_obj_attrs"; }
+ RGWOpType get_type() override { return RGW_OP_GET_OBJ_ATTRS; }
+ uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
+}; /* RGWGetObjAttrs */
+
class RGWGetLC : public RGWOp {
protected:
diff --git a/src/rgw/rgw_op_type.h b/src/rgw/rgw_op_type.h
index 49faea6403d..2c8225d289e 100644
--- a/src/rgw/rgw_op_type.h
+++ b/src/rgw/rgw_op_type.h
@@ -30,6 +30,7 @@ enum RGWOpType {
RGW_OP_COPY_OBJ,
RGW_OP_GET_ACLS,
RGW_OP_PUT_ACLS,
+ RGW_OP_GET_OBJ_ATTRS,
RGW_OP_GET_CORS,
RGW_OP_PUT_CORS,
RGW_OP_DELETE_CORS,
diff --git a/src/rgw/rgw_rest.h b/src/rgw/rgw_rest.h
index aa33080af56..9111696453e 100644
--- a/src/rgw/rgw_rest.h
+++ b/src/rgw/rgw_rest.h
@@ -403,6 +403,17 @@ public:
virtual std::string canonical_name() const override { return fmt::format("REST.{}.ACL", s->info.method); }
};
+class RGWGetObjAttrs_ObjStore : public RGWGetObjAttrs {
+public:
+ RGWGetObjAttrs_ObjStore() {}
+ ~RGWGetObjAttrs_ObjStore() override {}
+
+ int get_params(optional_yield y) = 0;
+ /* not actually used */
+ int send_response_data_error(optional_yield y) override { return 0; };
+ int send_response_data(bufferlist& bl, off_t ofs, off_t len) override { return 0; };
+};
+
class RGWGetLC_ObjStore : public RGWGetLC {
public:
RGWGetLC_ObjStore() {}
diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc
index 30ebe8e8965..9edb79d8fd0 100644
--- a/src/rgw/rgw_rest_s3.cc
+++ b/src/rgw/rgw_rest_s3.cc
@@ -9,6 +9,7 @@
#include <string_view>
#include "common/ceph_crypto.h"
+#include "common/dout.h"
#include "common/split.h"
#include "common/Formatter.h"
#include "common/utf8.h"
@@ -807,7 +808,6 @@ void RGWGetObjTags_ObjStore_S3::send_response_data(bufferlist& bl)
}
}
-
int RGWPutObjTags_ObjStore_S3::get_params(optional_yield y)
{
RGWXMLParser parser;
@@ -2533,6 +2533,10 @@ int RGWCreateBucket_ObjStore_S3::get_params(optional_yield y)
if ((op_ret < 0) && (op_ret != -ERR_LENGTH_REQUIRED))
return op_ret;
+ if (!driver->is_meta_master()) {
+ in_data.append(data);
+ }
+
if (data.length()) {
RGWCreateBucketParser parser;
@@ -3815,6 +3819,196 @@ void RGWPutACLs_ObjStore_S3::send_response()
dump_start(s);
}
+int RGWGetObjAttrs_ObjStore_S3::get_params(optional_yield y)
+{
+ string err;
+ auto& env = s->info.env;
+ version_id = s->info.args.get("versionId");
+
+ auto hdr = env->get_optional("HTTP_X_AMZ_EXPECTED_BUCKET_OWNER");
+ if (hdr) {
+ expected_bucket_owner = *hdr;
+ }
+
+ hdr = env->get_optional("HTTP_X_AMZ_MAX_PARTS");
+ if (hdr) {
+ max_parts = strict_strtol(hdr->c_str(), 10, &err);
+ if (!err.empty()) {
+ s->err.message = "Invalid value for MaxParts: " + err;
+ ldpp_dout(s, 10) << "Invalid value for MaxParts " << *hdr << ": "
+ << err << dendl;
+ return -ERR_INVALID_PART;
+ }
+ max_parts = std::min(*max_parts, 1000);
+ }
+
+ hdr = env->get_optional("HTTP_X_AMZ_PART_NUMBER_MARKER");
+ if (hdr) {
+ marker = strict_strtol(hdr->c_str(), 10, &err);
+ if (!err.empty()) {
+ s->err.message = "Invalid value for PartNumberMarker: " + err;
+ ldpp_dout(s, 10) << "Invalid value for PartNumberMarker " << *hdr << ": "
+ << err << dendl;
+ return -ERR_INVALID_PART;
+ }
+ }
+
+ hdr = env->get_optional("HTTP_X_AMZ_OBJECT_ATTRIBUTES");
+ if (hdr) {
+ requested_attributes = recognize_attrs(*hdr);
+ }
+
+ /* XXX skipping SSE-C params for now */
+
+ return 0;
+} /* RGWGetObjAttrs_ObjStore_S3::get_params(...) */
+
+int RGWGetObjAttrs_ObjStore_S3::get_decrypt_filter(
+ std::unique_ptr<RGWGetObj_Filter> *filter,
+ RGWGetObj_Filter* cb, bufferlist* manifest_bl)
+{
+ // we aren't actually decrypting the data, but for objects encrypted with
+ // SSE-C we do need to verify that required headers are present and valid
+ //
+ // in the SSE-KMS and SSE-S3 cases, this unfortunately causes us to fetch
+ // decryption keys which we don't need :(
+ std::unique_ptr<BlockCrypt> block_crypt; // ignored
+ std::map<std::string, std::string> crypt_http_responses; // ignored
+ return rgw_s3_prepare_decrypt(s, s->yield, attrs, &block_crypt,
+ crypt_http_responses);
+}
+
+void RGWGetObjAttrs_ObjStore_S3::send_response()
+{
+ if (op_ret)
+ set_req_state_err(s, op_ret);
+ dump_errno(s);
+
+ if (op_ret == 0) {
+ version_id = s->object->get_instance();
+
+ // x-amz-delete-marker: DeleteMarker // not sure we can plausibly do this?
+ dump_last_modified(s, lastmod);
+ dump_header_if_nonempty(s, "x-amz-version-id", version_id);
+ // x-amz-request-charged: RequestCharged
+ }
+
+ end_header(s, this, to_mime_type(s->format));
+ dump_start(s);
+
+ if (op_ret == 0) {
+ s->formatter->open_object_section("GetObjectAttributes");
+ if (requested_attributes & as_flag(ReqAttributes::Etag)) {
+ if (lo_etag.empty()) {
+ auto iter = attrs.find(RGW_ATTR_ETAG);
+ if (iter != attrs.end()) {
+ lo_etag = iter->second.to_str();
+ }
+ }
+ s->formatter->dump_string("ETag", lo_etag);
+ }
+
+ if (requested_attributes & as_flag(ReqAttributes::Checksum)) {
+ s->formatter->open_object_section("Checksum");
+ auto iter = attrs.find(RGW_ATTR_CKSUM);
+ if (iter != attrs.end()) {
+ try {
+ rgw::cksum::Cksum cksum;
+ auto bliter = iter->second.cbegin();
+ cksum.decode(bliter);
+ if (multipart_parts_count && multipart_parts_count > 0) {
+ s->formatter->dump_string(cksum.element_name(),
+ fmt::format("{}-{}", cksum.to_armor(), *multipart_parts_count));
+ } else {
+ s->formatter->dump_string(cksum.element_name(), cksum.to_armor());
+ }
+ } catch (buffer::error& err) {
+ ldpp_dout(this, 0)
+ << "ERROR: could not decode stored cksum, caught buffer::error" << dendl;
+ }
+ }
+ s->formatter->close_section(); /* Checksum */
+ } /* Checksum */
+
+ if (requested_attributes & as_flag(ReqAttributes::ObjectParts)) {
+ if (multipart_parts_count && multipart_parts_count > 0) {
+
+ /* XXX the following was needed to see a manifest at list_parts()! */
+ op_ret = s->object->load_obj_state(s, s->yield);
+ if (op_ret < 0) {
+ ldpp_dout_fmt(this, 0,
+ "ERROR: {} load_obj_state() failed ret={}", __func__,
+ op_ret);
+ }
+
+ ldpp_dout_fmt(this, 16,
+ "{} attr flags={} parts_count={}",
+ __func__, requested_attributes, *multipart_parts_count);
+
+ s->formatter->open_object_section("ObjectParts");
+
+ bool truncated = false;
+ int next_marker;
+
+ using namespace rgw::sal;
+
+ int ret =
+ s->object->list_parts(
+ this, s->cct,
+ max_parts ? *max_parts : 1000,
+ marker ? *marker : 0,
+ &next_marker, &truncated,
+ [&](const Object::Part& part) -> int {
+ s->formatter->open_object_section("Part");
+ s->formatter->dump_int("PartNumber", part.part_number);
+ s->formatter->dump_unsigned("Size", part.part_size);
+ if (part.cksum.type != rgw::cksum::Type::none) {
+ s->formatter->dump_string(part.cksum.element_name(), part.cksum.to_armor());
+ }
+ s->formatter->close_section(); /* Part */
+ return 0;
+ }, s->yield);
+
+ if (ret < 0) {
+ ldpp_dout_fmt(this, 0,
+ "ERROR: {} list-parts failed for {}",
+ __func__, s->object->get_name());
+ }
+ /* AWS docs disagree on the name of this element */
+ s->formatter->dump_int("PartsCount", *multipart_parts_count);
+ s->formatter->dump_int("TotalPartsCount", *multipart_parts_count);
+ s->formatter->dump_bool("IsTruncated", truncated);
+ if (max_parts) {
+ s->formatter->dump_int("MaxParts", *max_parts);
+ }
+ if(truncated) {
+ s->formatter->dump_int("NextPartNumberMarker", next_marker);
+ }
+ if (marker) {
+ s->formatter->dump_int("PartNumberMarker", *marker);
+ }
+ s->formatter->close_section();
+ } /* multipart_parts_count positive */
+ } /* ObjectParts */
+
+ if (requested_attributes & as_flag(ReqAttributes::ObjectSize)) {
+ s->formatter->dump_int("ObjectSize", s->obj_size);
+ }
+
+ if (requested_attributes & as_flag(ReqAttributes::StorageClass)) {
+ auto iter = attrs.find(RGW_ATTR_STORAGE_CLASS);
+ if (iter != attrs.end()) {
+ s->formatter->dump_string("StorageClass", iter->second.to_str());
+ } else {
+ s->formatter->dump_string("StorageClass", "STANDARD");
+ }
+ }
+ s->formatter->close_section();
+ } /* op_ret == 0 */
+
+ rgw_flush_formatter_and_reset(s, s->formatter);
+} /* RGWGetObjAttrs_ObjStore_S3::send_response */
+
void RGWGetLC_ObjStore_S3::execute(optional_yield y)
{
config.set_ctx(s->cct);
@@ -4794,6 +4988,7 @@ RGWOp *RGWHandler_REST_Bucket_S3::get_obj_op(bool get_data) const
RGWOp *RGWHandler_REST_Bucket_S3::op_get()
{
+ /* XXX maybe we could replace this with an indexing operation */
if (s->info.args.sub_resource_exists("encryption"))
return nullptr;
@@ -4990,6 +5185,8 @@ RGWOp *RGWHandler_REST_Obj_S3::op_get()
return new RGWGetObjLayout_ObjStore_S3;
} else if (is_tagging_op()) {
return new RGWGetObjTags_ObjStore_S3;
+ } else if (is_attributes_op()) {
+ return new RGWGetObjAttrs_ObjStore_S3;
} else if (is_obj_retention_op()) {
return new RGWGetObjRetention_ObjStore_S3;
} else if (is_obj_legal_hold_op()) {
diff --git a/src/rgw/rgw_rest_s3.h b/src/rgw/rgw_rest_s3.h
index 50160d79a42..e8fdc69751c 100644
--- a/src/rgw/rgw_rest_s3.h
+++ b/src/rgw/rgw_rest_s3.h
@@ -374,6 +374,18 @@ public:
int get_params(optional_yield y) override;
};
+class RGWGetObjAttrs_ObjStore_S3 : public RGWGetObjAttrs_ObjStore {
+public:
+ RGWGetObjAttrs_ObjStore_S3() {}
+ ~RGWGetObjAttrs_ObjStore_S3() override {}
+
+ int get_params(optional_yield y) override;
+ int get_decrypt_filter(std::unique_ptr<RGWGetObj_Filter>* filter,
+ RGWGetObj_Filter* cb,
+ bufferlist* manifest_bl) override;
+ void send_response() override;
+};
+
class RGWGetLC_ObjStore_S3 : public RGWGetLC_ObjStore {
protected:
RGWLifecycleConfiguration_S3 config;
@@ -701,6 +713,9 @@ protected:
bool is_acl_op() const {
return s->info.args.exists("acl");
}
+ bool is_attributes_op() const {
+ return s->info.args.exists("attributes");
+ }
bool is_cors_op() const {
return s->info.args.exists("cors");
}
@@ -759,6 +774,9 @@ protected:
bool is_acl_op() const {
return s->info.args.exists("acl");
}
+ bool is_attributes_op() const {
+ return s->info.args.exists("attributes");
+ }
bool is_tagging_op() const {
return s->info.args.exists("tagging");
}
diff --git a/src/rgw/rgw_sal.h b/src/rgw/rgw_sal.h
index e098c4decf7..4b94f74b851 100644
--- a/src/rgw/rgw_sal.h
+++ b/src/rgw/rgw_sal.h
@@ -15,6 +15,7 @@
#pragma once
+#include <cstdint>
#include <optional>
#include <boost/intrusive_ptr.hpp>
#include <boost/smart_ptr/intrusive_ref_counter.hpp>
@@ -26,6 +27,7 @@
#include "rgw_notify_event_type.h"
#include "rgw_req_context.h"
#include "include/random.h"
+#include "include/function2.hpp"
// FIXME: following subclass dependencies
#include "driver/rados/rgw_user.h"
@@ -1169,6 +1171,9 @@ class Object {
std::string* version_id, std::string* tag, std::string* etag,
void (*progress_cb)(off_t, void *), void* progress_data,
const DoutPrefixProvider* dpp, optional_yield y) = 0;
+
+ /** return logging subsystem */
+ virtual unsigned get_subsys() { return ceph_subsys_rgw; };
/** Get the ACL for this object */
virtual RGWAccessControlPolicy& get_acl(void) = 0;
/** Set the ACL for this object */
@@ -1249,6 +1254,28 @@ class Object {
/** Dump driver-specific object layout info in JSON */
virtual int dump_obj_layout(const DoutPrefixProvider *dpp, optional_yield y, Formatter* f) = 0;
+ /* A transfer data type describing metadata specific to one part of a
+ * completed multipart upload object, following the GetObjectAttributes
+ * response syntax for Object::Parts here:
+ * https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectAttributes.html */
+ class Part
+ {
+ public:
+ int part_number;
+ uint32_t part_size;
+ rgw::cksum::Cksum cksum;
+ }; /* Part */
+
+ /* callback function/object used by list_parts */
+ using list_parts_each_t =
+ const fu2::unique_function<int(const Part&) const>;
+
+ /** If multipart, enumerate (a range [marker..marker+[min(max_parts, parts_count-1)] of) parts of the object */
+ virtual int list_parts(const DoutPrefixProvider* dpp, CephContext* cct,
+ int max_parts, int marker, int* next_marker,
+ bool* truncated, list_parts_each_t each_func,
+ optional_yield y) = 0;
+
/** Get the cached attributes for this object */
virtual Attrs& get_attrs(void) = 0;
/** Get the (const) cached attributes for this object */
@@ -1447,7 +1474,7 @@ public:
virtual int init(const DoutPrefixProvider* dpp, optional_yield y, ACLOwner& owner, rgw_placement_rule& dest_placement, rgw::sal::Attrs& attrs) = 0;
/** List all the parts of this upload, filling the parts cache */
virtual int list_parts(const DoutPrefixProvider* dpp, CephContext* cct,
- int num_parts, int marker,
+ int max_parts, int marker,
int* next_marker, bool* truncated, optional_yield y,
bool assume_unsorted = false) = 0;
/** Abort this upload */
@@ -1751,8 +1778,6 @@ class Zone {
virtual bool is_writeable() = 0;
/** Get the URL for the endpoint for redirecting to this zone */
virtual bool get_redirect_endpoint(std::string* endpoint) = 0;
- /** Check to see if the given API is supported in this zone */
- virtual bool has_zonegroup_api(const std::string& api) const = 0;
/** Get the current period ID for this zone */
virtual const std::string& get_current_period_id() = 0;
/** Get thes system access key for this zone */
diff --git a/src/rgw/rgw_sal_dbstore.cc b/src/rgw/rgw_sal_dbstore.cc
index 0e4f95846d1..02fd7a49cda 100644
--- a/src/rgw/rgw_sal_dbstore.cc
+++ b/src/rgw/rgw_sal_dbstore.cc
@@ -458,14 +458,6 @@ namespace rgw::sal {
return false;
}
- bool DBZone::has_zonegroup_api(const std::string& api) const
- {
- if (api == "default")
- return true;
-
- return false;
- }
-
const std::string& DBZone::get_current_period_id()
{
return current_period->get_id();
@@ -496,6 +488,14 @@ namespace rgw::sal {
return std::make_unique<DBLuaManager>(this);
}
+ int DBObject::list_parts(const DoutPrefixProvider* dpp, CephContext* cct,
+ int max_parts, int marker, int* next_marker,
+ bool* truncated, list_parts_each_t each_func,
+ optional_yield y)
+ {
+ return -EOPNOTSUPP;
+ }
+
int DBObject::load_obj_state(const DoutPrefixProvider* dpp, optional_yield y, bool follow_olh)
{
RGWObjState* astate;
diff --git a/src/rgw/rgw_sal_dbstore.h b/src/rgw/rgw_sal_dbstore.h
index b54249df031..4df10d1dce1 100644
--- a/src/rgw/rgw_sal_dbstore.h
+++ b/src/rgw/rgw_sal_dbstore.h
@@ -303,7 +303,6 @@ protected:
virtual const std::string& get_name() const override;
virtual bool is_writeable() override;
virtual bool get_redirect_endpoint(std::string* endpoint) override;
- virtual bool has_zonegroup_api(const std::string& api) const override;
virtual const std::string& get_current_period_id() override;
virtual const RGWAccessKey& get_system_key() override;
virtual const std::string& get_realm_name() override;
@@ -529,6 +528,7 @@ protected:
DBObject(DBObject& _o) = default;
+ virtual unsigned get_subsys() { return ceph_subsys_rgw_dbstore; };
virtual int delete_object(const DoutPrefixProvider* dpp,
optional_yield y,
uint32_t flags,
@@ -554,6 +554,13 @@ protected:
virtual int set_acl(const RGWAccessControlPolicy& acl) override { acls = acl; return 0; }
virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y, uint32_t flags) override;
+
+ /** If multipart, enumerate (a range [marker..marker+[min(max_parts, parts_count-1)] of) parts of the object */
+ virtual int list_parts(const DoutPrefixProvider* dpp, CephContext* cct,
+ int max_parts, int marker, int* next_marker,
+ bool* truncated, list_parts_each_t each_func,
+ optional_yield y) override;
+
virtual int load_obj_state(const DoutPrefixProvider* dpp, optional_yield y, bool follow_olh = true) override;
virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj = NULL) override;
virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val, optional_yield y, const DoutPrefixProvider* dpp) override;
diff --git a/src/rgw/rgw_sal_filter.cc b/src/rgw/rgw_sal_filter.cc
index 733bfa39ee2..15da580988e 100644
--- a/src/rgw/rgw_sal_filter.cc
+++ b/src/rgw/rgw_sal_filter.cc
@@ -1046,6 +1046,17 @@ RGWAccessControlPolicy& FilterObject::get_acl()
return next->get_acl();
}
+int FilterObject::list_parts(const DoutPrefixProvider* dpp, CephContext* cct,
+ int max_parts, int marker, int* next_marker,
+ bool* truncated, list_parts_each_t each_func,
+ optional_yield y)
+{
+ return next->list_parts(dpp, cct, max_parts, marker, next_marker,
+ truncated,
+ sal::Object::list_parts_each_t(each_func),
+ y);
+}
+
int FilterObject::load_obj_state(const DoutPrefixProvider *dpp,
optional_yield y, bool follow_olh) {
return next->load_obj_state(dpp, y, follow_olh);
diff --git a/src/rgw/rgw_sal_filter.h b/src/rgw/rgw_sal_filter.h
index 43a440e8b10..947ce9d4bf5 100644
--- a/src/rgw/rgw_sal_filter.h
+++ b/src/rgw/rgw_sal_filter.h
@@ -108,9 +108,6 @@ public:
virtual bool get_redirect_endpoint(std::string* endpoint) override {
return next->get_redirect_endpoint(endpoint);
}
- virtual bool has_zonegroup_api(const std::string& api) const override {
- return next->has_zonegroup_api(api);
- }
virtual const std::string& get_current_period_id() override {
return next->get_current_period_id();
}
@@ -781,6 +778,12 @@ public:
virtual bool empty() const override { return next->empty(); }
virtual const std::string &get_name() const override { return next->get_name(); }
+ /** If multipart, enumerate (a range [marker..marker+[min(max_parts, parts_count-1)] of) parts of the object */
+ virtual int list_parts(const DoutPrefixProvider* dpp, CephContext* cct,
+ int max_parts, int marker, int* next_marker,
+ bool* truncated, list_parts_each_t each_func,
+ optional_yield y) override;
+
virtual int load_obj_state(const DoutPrefixProvider *dpp, optional_yield y,
bool follow_olh = true) override;
virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs,
diff --git a/src/rgw/services/svc_zone.cc b/src/rgw/services/svc_zone.cc
index 61360135dd9..97d81550058 100644
--- a/src/rgw/services/svc_zone.cc
+++ b/src/rgw/services/svc_zone.cc
@@ -657,18 +657,6 @@ const string& RGWSI_Zone::get_current_period_id() const
return current_period->get_id();
}
-bool RGWSI_Zone::has_zonegroup_api(const std::string& api) const
-{
- if (!current_period->get_id().empty()) {
- const auto& zonegroups_by_api = current_period->get_map().zonegroups_by_api;
- if (zonegroups_by_api.find(api) != zonegroups_by_api.end())
- return true;
- } else if (zonegroup->api_name == api) {
- return true;
- }
- return false;
-}
-
bool RGWSI_Zone::zone_is_writeable()
{
return writeable_zone && !get_zone().is_read_only();
diff --git a/src/rgw/services/svc_zone.h b/src/rgw/services/svc_zone.h
index 1b3f49587a3..719546eb8db 100644
--- a/src/rgw/services/svc_zone.h
+++ b/src/rgw/services/svc_zone.h
@@ -96,7 +96,6 @@ public:
uint32_t get_zone_short_id() const;
const std::string& get_current_period_id() const;
- bool has_zonegroup_api(const std::string& api) const;
bool zone_is_writeable();
bool zone_syncs_from(const RGWZone& target_zone, const RGWZone& source_zone) const;
diff --git a/src/script/run-make.sh b/src/script/run-make.sh
index 52d43d3a171..23724028fe6 100755
--- a/src/script/run-make.sh
+++ b/src/script/run-make.sh
@@ -29,6 +29,7 @@ function clean_up_after_myself() {
function detect_ceph_dev_pkgs() {
local boost_root=/opt/ceph
+ local cmake_opts=""
if test -f $boost_root/include/boost/config.hpp; then
cmake_opts+=" -DWITH_SYSTEM_BOOST=ON -DBOOST_ROOT=$boost_root"
else
diff --git a/src/test/ObjectMap/KeyValueDBMemory.cc b/src/test/ObjectMap/KeyValueDBMemory.cc
index 234e963397e..cfe25930d6a 100644
--- a/src/test/ObjectMap/KeyValueDBMemory.cc
+++ b/src/test/ObjectMap/KeyValueDBMemory.cc
@@ -132,12 +132,26 @@ public:
return "";
}
+ string_view key_as_sv() override {
+ if (valid())
+ return (*it).first.second;
+ else
+ return "";
+ }
+
pair<string,string> raw_key() override {
if (valid())
return (*it).first;
else
return make_pair("", "");
}
+
+ pair<string_view,string_view> raw_key_as_sv() override {
+ if (valid())
+ return (*it).first;
+ else
+ return make_pair("", "");
+ }
bool raw_key_is_prefixed(const string &prefix) override {
return prefix == (*it).first.first;
@@ -150,6 +164,13 @@ public:
return bufferlist();
}
+ std::string_view value_as_sv() override {
+ if (valid())
+ return std::string_view{it->second.c_str(), it->second.length()};
+ else
+ return std::string_view();
+ }
+
int status() override {
return 0;
}
diff --git a/src/test/crimson/seastore/test_btree_lba_manager.cc b/src/test/crimson/seastore/test_btree_lba_manager.cc
index 8b1f7435c87..7874411e0ff 100644
--- a/src/test/crimson/seastore/test_btree_lba_manager.cc
+++ b/src/test/crimson/seastore/test_btree_lba_manager.cc
@@ -157,7 +157,10 @@ struct btree_test_base :
}).safe_then([this] {
return seastar::do_with(
cache->create_transaction(
- Transaction::src_t::MUTATE, "test_set_up_fut", false),
+ Transaction::src_t::MUTATE,
+ "test_set_up_fut",
+ CACHE_HINT_TOUCH,
+ false),
[this](auto &ref_t) {
return with_trans_intr(*ref_t, [&](auto &t) {
cache->init();
@@ -236,7 +239,10 @@ struct lba_btree_test : btree_test_base {
template <typename F>
auto lba_btree_update(F &&f) {
auto tref = cache->create_transaction(
- Transaction::src_t::MUTATE, "test_btree_update", false);
+ Transaction::src_t::MUTATE,
+ "test_btree_update",
+ CACHE_HINT_TOUCH,
+ false);
auto &t = *tref;
with_trans_intr(
t,
@@ -281,7 +287,10 @@ struct lba_btree_test : btree_test_base {
template <typename F>
auto lba_btree_read(F &&f) {
auto t = cache->create_transaction(
- Transaction::src_t::READ, "test_btree_read", false);
+ Transaction::src_t::READ,
+ "test_btree_read",
+ CACHE_HINT_TOUCH,
+ false);
return with_trans_intr(
*t,
[this, f=std::forward<F>(f)](auto &t) mutable {
@@ -429,7 +438,10 @@ struct btree_lba_manager_test : btree_test_base {
auto create_transaction(bool create_fake_extent=true) {
auto t = test_transaction_t{
cache->create_transaction(
- Transaction::src_t::MUTATE, "test_mutate_lba", false),
+ Transaction::src_t::MUTATE,
+ "test_mutate_lba",
+ CACHE_HINT_TOUCH,
+ false),
test_lba_mappings
};
if (create_fake_extent) {
@@ -445,7 +457,10 @@ struct btree_lba_manager_test : btree_test_base {
auto create_weak_transaction() {
auto t = test_transaction_t{
cache->create_transaction(
- Transaction::src_t::READ, "test_read_weak", true),
+ Transaction::src_t::READ,
+ "test_read_weak",
+ CACHE_HINT_TOUCH,
+ true),
test_lba_mappings
};
return t;
diff --git a/src/test/crimson/seastore/test_seastore_cache.cc b/src/test/crimson/seastore/test_seastore_cache.cc
index 6e24f436b98..fa774886139 100644
--- a/src/test/crimson/seastore/test_seastore_cache.cc
+++ b/src/test/crimson/seastore/test_seastore_cache.cc
@@ -87,7 +87,10 @@ struct cache_test_t : public seastar_test_suite_t {
auto get_transaction() {
return cache->create_transaction(
- Transaction::src_t::MUTATE, "test_cache", false);
+ Transaction::src_t::MUTATE,
+ "test_cache",
+ CACHE_HINT_TOUCH,
+ false);
}
template <typename T, typename... Args>
diff --git a/src/test/crimson/test_backfill.cc b/src/test/crimson/test_backfill.cc
index 7e058c80ed6..e0fc5821d08 100644
--- a/src/test/crimson/test_backfill.cc
+++ b/src/test/crimson/test_backfill.cc
@@ -119,6 +119,11 @@ class BackfillFixture : public crimson::osd::BackfillState::BackfillListener {
events_to_dispatch.emplace_back(event.intrusive_from_this());
}
+ template <class EventT>
+ void schedule_event_immediate(const EventT& event) {
+ events_to_dispatch.emplace_front(event.intrusive_from_this());
+ }
+
// BackfillListener {
void request_replica_scan(
const pg_shard_t& target,
@@ -188,12 +193,11 @@ public:
struct PGFacade;
void cancel() {
- events_to_dispatch.clear();
- schedule_event(crimson::osd::BackfillState::CancelBackfill{});
+ schedule_event_immediate(crimson::osd::BackfillState::CancelBackfill{});
}
void resume() {
- schedule_event(crimson::osd::BackfillState::Triggered{});
+ schedule_event_immediate(crimson::osd::BackfillState::Triggered{});
}
};
@@ -274,6 +278,9 @@ struct BackfillFixture::PGFacade : public crimson::osd::BackfillState::PGFacade
return backfill_source.projected_log;
}
+ std::ostream &print(std::ostream &out) const override {
+ return out << "FakePGFacade";
+ }
};
BackfillFixture::BackfillFixture(
@@ -452,7 +459,69 @@ TEST(backfill, two_empty_replicas)
EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store));
}
-TEST(backfill, cancel_resume)
+TEST(backfill, cancel_resume_middle_of_primaryscan)
+{
+ const auto reference_store = FakeStore{ {
+ { "1:00058bcc:::rbd_data.1018ac3e755.00000000000000d5:head", {10, 234} },
+ { "1:00ed7f8e:::rbd_data.1018ac3e755.00000000000000af:head", {10, 196} },
+ { "1:01483aea:::rbd_data.1018ac3e755.0000000000000095:head", {10, 169} },
+ }};
+ auto cluster_fixture = BackfillFixtureBuilder::add_source(
+ reference_store.objs
+ ).add_target(
+ { /* nothing 1 */ }
+ ).add_target(
+ { /* nothing 2 */ }
+ ).get_result();
+
+ EXPECT_CALL(cluster_fixture, backfilled);
+ cluster_fixture.cancel();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::CancelBackfill>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::PrimaryScanned>();
+ cluster_fixture.resume();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::Triggered>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ReplicaScanned>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ReplicaScanned>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ObjectPushed>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ObjectPushed>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ObjectPushed>();
+ cluster_fixture.next_till_done();
+
+ EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store));
+}
+
+TEST(backfill, cancel_resume_middle_of_replicascan1)
+{
+ const auto reference_store = FakeStore{ {
+ { "1:00058bcc:::rbd_data.1018ac3e755.00000000000000d5:head", {10, 234} },
+ { "1:00ed7f8e:::rbd_data.1018ac3e755.00000000000000af:head", {10, 196} },
+ { "1:01483aea:::rbd_data.1018ac3e755.0000000000000095:head", {10, 169} },
+ }};
+ auto cluster_fixture = BackfillFixtureBuilder::add_source(
+ reference_store.objs
+ ).add_target(
+ { /* nothing 1 */ }
+ ).add_target(
+ { /* nothing 2 */ }
+ ).get_result();
+
+ EXPECT_CALL(cluster_fixture, backfilled);
+ cluster_fixture.next_round2<crimson::osd::BackfillState::PrimaryScanned>();
+ cluster_fixture.cancel();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::CancelBackfill>();
+ cluster_fixture.resume();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::Triggered>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ReplicaScanned>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ReplicaScanned>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ObjectPushed>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ObjectPushed>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ObjectPushed>();
+ cluster_fixture.next_till_done();
+
+ EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store));
+}
+
+TEST(backfill, cancel_resume_middle_of_replicascan2)
{
const auto reference_store = FakeStore{ {
{ "1:00058bcc:::rbd_data.1018ac3e755.00000000000000d5:head", {10, 234} },
@@ -469,12 +538,43 @@ TEST(backfill, cancel_resume)
EXPECT_CALL(cluster_fixture, backfilled);
cluster_fixture.next_round2<crimson::osd::BackfillState::PrimaryScanned>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ReplicaScanned>();
cluster_fixture.cancel();
cluster_fixture.next_round2<crimson::osd::BackfillState::CancelBackfill>();
cluster_fixture.resume();
cluster_fixture.next_round2<crimson::osd::BackfillState::Triggered>();
cluster_fixture.next_round2<crimson::osd::BackfillState::ReplicaScanned>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ObjectPushed>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ObjectPushed>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ObjectPushed>();
+ cluster_fixture.next_till_done();
+
+ EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store));
+}
+
+TEST(backfill, cancel_resume_middle_of_push1)
+{
+ const auto reference_store = FakeStore{ {
+ { "1:00058bcc:::rbd_data.1018ac3e755.00000000000000d5:head", {10, 234} },
+ { "1:00ed7f8e:::rbd_data.1018ac3e755.00000000000000af:head", {10, 196} },
+ { "1:01483aea:::rbd_data.1018ac3e755.0000000000000095:head", {10, 169} },
+ }};
+ auto cluster_fixture = BackfillFixtureBuilder::add_source(
+ reference_store.objs
+ ).add_target(
+ { /* nothing 1 */ }
+ ).add_target(
+ { /* nothing 2 */ }
+ ).get_result();
+
+ EXPECT_CALL(cluster_fixture, backfilled);
+ cluster_fixture.next_round2<crimson::osd::BackfillState::PrimaryScanned>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ReplicaScanned>();
cluster_fixture.next_round2<crimson::osd::BackfillState::ReplicaScanned>();
+ cluster_fixture.cancel();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::CancelBackfill>();
+ cluster_fixture.resume();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::Triggered>();
cluster_fixture.next_round2<crimson::osd::BackfillState::ObjectPushed>();
cluster_fixture.next_round2<crimson::osd::BackfillState::ObjectPushed>();
cluster_fixture.next_round2<crimson::osd::BackfillState::ObjectPushed>();
@@ -483,7 +583,7 @@ TEST(backfill, cancel_resume)
EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store));
}
-TEST(backfill, cancel_resume_middle_of_scan)
+TEST(backfill, cancel_resume_middle_of_push2)
{
const auto reference_store = FakeStore{ {
{ "1:00058bcc:::rbd_data.1018ac3e755.00000000000000d5:head", {10, 234} },
@@ -501,14 +601,46 @@ TEST(backfill, cancel_resume_middle_of_scan)
EXPECT_CALL(cluster_fixture, backfilled);
cluster_fixture.next_round2<crimson::osd::BackfillState::PrimaryScanned>();
cluster_fixture.next_round2<crimson::osd::BackfillState::ReplicaScanned>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ReplicaScanned>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ObjectPushed>();
cluster_fixture.cancel();
cluster_fixture.next_round2<crimson::osd::BackfillState::CancelBackfill>();
cluster_fixture.resume();
cluster_fixture.next_round2<crimson::osd::BackfillState::Triggered>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ObjectPushed>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ObjectPushed>();
+ cluster_fixture.next_till_done();
+
+ EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store));
+}
+
+TEST(backfill, cancel_resume_middle_of_push3)
+{
+ const auto reference_store = FakeStore{ {
+ { "1:00058bcc:::rbd_data.1018ac3e755.00000000000000d5:head", {10, 234} },
+ { "1:00ed7f8e:::rbd_data.1018ac3e755.00000000000000af:head", {10, 196} },
+ { "1:01483aea:::rbd_data.1018ac3e755.0000000000000095:head", {10, 169} },
+ }};
+ auto cluster_fixture = BackfillFixtureBuilder::add_source(
+ reference_store.objs
+ ).add_target(
+ { /* nothing 1 */ }
+ ).add_target(
+ { /* nothing 2 */ }
+ ).get_result();
+
+ EXPECT_CALL(cluster_fixture, backfilled);
+ cluster_fixture.next_round2<crimson::osd::BackfillState::PrimaryScanned>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::ReplicaScanned>();
cluster_fixture.next_round2<crimson::osd::BackfillState::ReplicaScanned>();
cluster_fixture.next_round2<crimson::osd::BackfillState::ObjectPushed>();
+ cluster_fixture.cancel();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::CancelBackfill>();
cluster_fixture.next_round2<crimson::osd::BackfillState::ObjectPushed>();
cluster_fixture.next_round2<crimson::osd::BackfillState::ObjectPushed>();
+ cluster_fixture.resume();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::Triggered>();
+ cluster_fixture.next_round2<crimson::osd::BackfillState::RequestDone>();
cluster_fixture.next_till_done();
EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store));
diff --git a/src/test/librados/aio.cc b/src/test/librados/aio.cc
index 68587fe87d1..7fb90bdd38e 100644
--- a/src/test/librados/aio.cc
+++ b/src/test/librados/aio.cc
@@ -1722,3 +1722,59 @@ TEST(LibRadosAioEC, MultiWrite) {
rados_aio_release(my_completion2);
rados_aio_release(my_completion3);
}
+
+TEST(LibRadosAio, CancelBeforeSubmit) {
+ AioTestData test_data;
+ ASSERT_EQ("", test_data.init());
+
+ rados_completion_t completion;
+ ASSERT_EQ(0, rados_aio_create_completion2(nullptr, nullptr, &completion));
+
+ ASSERT_EQ(0, rados_aio_cancel(test_data.m_ioctx, completion));
+ rados_aio_release(completion);
+}
+
+TEST(LibRadosAio, CancelBeforeComplete) {
+ AioTestData test_data;
+ ASSERT_EQ("", test_data.init());
+
+ // cancellation tests are racy, so retry if completion beats the cancellation
+ int ret = 0;
+ int tries = 10;
+ do {
+ rados_completion_t completion;
+ ASSERT_EQ(0, rados_aio_create_completion2(nullptr, nullptr, &completion));
+ char buf[128];
+ ASSERT_EQ(0, rados_aio_read(test_data.m_ioctx, "nonexistent",
+ completion, buf, sizeof(buf), 0));
+
+ ASSERT_EQ(0, rados_aio_cancel(test_data.m_ioctx, completion));
+ {
+ TestAlarm alarm;
+ ASSERT_EQ(0, rados_aio_wait_for_complete(completion));
+ }
+ ret = rados_aio_get_return_value(completion);
+ rados_aio_release(completion);
+ } while (ret == -ENOENT && --tries);
+
+ ASSERT_EQ(-ECANCELED, ret);
+}
+
+TEST(LibRadosAio, CancelAfterComplete) {
+ AioTestData test_data;
+ rados_completion_t completion;
+ ASSERT_EQ("", test_data.init());
+
+ ASSERT_EQ(0, rados_aio_create_completion2(nullptr, nullptr, &completion));
+ char buf[128];
+ ASSERT_EQ(0, rados_aio_read(test_data.m_ioctx, "nonexistent",
+ completion, buf, sizeof(buf), 0));
+
+ {
+ TestAlarm alarm;
+ ASSERT_EQ(0, rados_aio_wait_for_complete(completion));
+ }
+ ASSERT_EQ(0, rados_aio_cancel(test_data.m_ioctx, completion));
+ ASSERT_EQ(-ENOENT, rados_aio_get_return_value(completion));
+ rados_aio_release(completion);
+}
diff --git a/src/test/librados/aio_cxx.cc b/src/test/librados/aio_cxx.cc
index a70af050d70..5e35869b5c2 100644
--- a/src/test/librados/aio_cxx.cc
+++ b/src/test/librados/aio_cxx.cc
@@ -2467,3 +2467,92 @@ TEST(LibRadosAio, MultiReads) {
ASSERT_EQ(0, memcmp(buf, bl.c_str(), sizeof(buf)));
}
}
+
+// cancellation test fixture for global setup/teardown
+// parameterized to test both IoCtx::aio_cancel() and AioCompletion::cancel()
+class Cancel : public ::testing::TestWithParam<bool> {
+ static constexpr auto pool_prefix = "ceph_test_rados_api_pp";
+ static Rados rados;
+ static std::string pool_name;
+ protected:
+ static IoCtx ioctx;
+ public:
+ static void SetUpTestCase() {
+ pool_name = get_temp_pool_name(pool_prefix);
+ ASSERT_EQ("", create_one_pool_pp(pool_name, rados));
+ ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx));
+ }
+ static void TearDownTestCase() {
+ destroy_one_pool_pp(pool_name, rados);
+ }
+};
+Rados Cancel::rados;
+std::string Cancel::pool_name;
+IoCtx Cancel::ioctx;
+
+TEST_P(Cancel, BeforeSubmit)
+{
+ const bool use_completion = GetParam();
+
+ auto c = std::unique_ptr<AioCompletion>{Rados::aio_create_completion()};
+ if (use_completion) {
+ ASSERT_EQ(0, c->cancel());
+ } else {
+ ASSERT_EQ(0, ioctx.aio_cancel(c.get()));
+ }
+}
+
+TEST_P(Cancel, BeforeComplete)
+{
+ const bool use_completion = GetParam();
+
+ // cancellation tests are racy, so retry if completion beats the cancellation
+ int ret = 0;
+ int tries = 10;
+ do {
+ auto c = std::unique_ptr<AioCompletion>{Rados::aio_create_completion()};
+ ObjectReadOperation op;
+ op.assert_exists();
+ ioctx.aio_operate("nonexistent", c.get(), &op, nullptr);
+
+ if (use_completion) {
+ EXPECT_EQ(0, c->cancel());
+ } else {
+ EXPECT_EQ(0, ioctx.aio_cancel(c.get()));
+ }
+ {
+ TestAlarm alarm;
+ ASSERT_EQ(0, c->wait_for_complete());
+ }
+ ret = c->get_return_value();
+ } while (ret == -ENOENT && --tries);
+
+ EXPECT_EQ(-ECANCELED, ret);
+}
+
+TEST_P(Cancel, AfterComplete)
+{
+ const bool use_completion = GetParam();
+
+ auto c = std::unique_ptr<AioCompletion>{Rados::aio_create_completion()};
+ ObjectReadOperation op;
+ op.assert_exists();
+ ioctx.aio_operate("nonexistent", c.get(), &op, nullptr);
+ {
+ TestAlarm alarm;
+ ASSERT_EQ(0, c->wait_for_complete());
+ }
+ if (use_completion) {
+ EXPECT_EQ(0, c->cancel());
+ } else {
+ EXPECT_EQ(0, ioctx.aio_cancel(c.get()));
+ }
+ EXPECT_EQ(-ENOENT, c->get_return_value());
+}
+
+std::string cancel_test_name(const testing::TestParamInfo<Cancel::ParamType>& info)
+{
+ return info.param ? "cancel" : "aio_cancel";
+}
+
+INSTANTIATE_TEST_SUITE_P(LibRadosAio, Cancel, testing::Bool(), cancel_test_name);
diff --git a/src/test/librados/asio.cc b/src/test/librados/asio.cc
index 01ebb957150..500f36508a7 100644
--- a/src/test/librados/asio.cc
+++ b/src/test/librados/asio.cc
@@ -21,10 +21,14 @@
#include <boost/range/begin.hpp>
#include <boost/range/end.hpp>
+#include <boost/asio/bind_cancellation_slot.hpp>
+#include <boost/asio/cancellation_signal.hpp>
#include <boost/asio/io_context.hpp>
#include <boost/asio/spawn.hpp>
#include <boost/asio/use_future.hpp>
+#include <optional>
+
#define dout_subsys ceph_subsys_rados
#define dout_context g_ceph_context
@@ -78,6 +82,15 @@ void rethrow(std::exception_ptr eptr) {
if (eptr) std::rethrow_exception(eptr);
}
+auto capture(std::optional<error_code>& out) {
+ return [&out] (error_code ec, ...) { out = ec; };
+}
+
+auto capture(boost::asio::cancellation_signal& signal,
+ std::optional<error_code>& out) {
+ return boost::asio::bind_cancellation_slot(signal.slot(), capture(out));
+}
+
TEST_F(AsioRados, AsyncReadCallback)
{
boost::asio::io_context service;
@@ -385,6 +398,130 @@ TEST_F(AsioRados, AsyncWriteOperationYield)
service.run();
}
+// FIXME: this crashes on windows with:
+// Thread 1 received signal SIGILL, Illegal instruction.
+#ifndef _WIN32
+
+TEST_F(AsioRados, AsyncReadOperationCancelTerminal)
+{
+ // cancellation tests are racy, so retry if completion beats the cancellation
+ boost::system::error_code ec;
+ int tries = 10;
+ do {
+ boost::asio::io_context service;
+ boost::asio::cancellation_signal signal;
+ std::optional<error_code> result;
+
+ librados::ObjectReadOperation op;
+ op.assert_exists();
+ librados::async_operate(service, io, "noexist", &op, 0, nullptr,
+ capture(signal, result));
+
+ service.poll();
+ EXPECT_FALSE(service.stopped());
+ EXPECT_FALSE(result);
+
+ signal.emit(boost::asio::cancellation_type::terminal);
+
+ service.run();
+ ASSERT_TRUE(result);
+ ec = *result;
+
+ signal.emit(boost::asio::cancellation_type::all); // noop
+ } while (ec == std::errc::no_such_file_or_directory && --tries);
+
+ EXPECT_EQ(ec, boost::asio::error::operation_aborted);
+}
+
+TEST_F(AsioRados, AsyncReadOperationCancelTotal)
+{
+ // cancellation tests are racy, so retry if completion beats the cancellation
+ boost::system::error_code ec;
+ int tries = 10;
+ do {
+ boost::asio::io_context service;
+ boost::asio::cancellation_signal signal;
+ std::optional<error_code> result;
+
+ librados::ObjectReadOperation op;
+ op.assert_exists();
+ librados::async_operate(service, io, "noexist", &op, 0, nullptr,
+ capture(signal, result));
+
+ service.poll();
+ EXPECT_FALSE(service.stopped());
+ EXPECT_FALSE(result);
+
+ signal.emit(boost::asio::cancellation_type::total);
+
+ service.run();
+ ASSERT_TRUE(result);
+ ec = *result;
+
+ signal.emit(boost::asio::cancellation_type::all); // noop
+ } while (ec == std::errc::no_such_file_or_directory && --tries);
+
+ EXPECT_EQ(ec, boost::asio::error::operation_aborted);
+}
+
+TEST_F(AsioRados, AsyncWriteOperationCancelTerminal)
+{
+ // cancellation tests are racy, so retry if completion beats the cancellation
+ boost::system::error_code ec;
+ int tries = 10;
+ do {
+ boost::asio::io_context service;
+ boost::asio::cancellation_signal signal;
+ std::optional<error_code> result;
+
+ librados::ObjectWriteOperation op;
+ op.assert_exists();
+ librados::async_operate(service, io, "noexist", &op, 0, nullptr,
+ capture(signal, result));
+
+ service.poll();
+ EXPECT_FALSE(service.stopped());
+ EXPECT_FALSE(result);
+
+ signal.emit(boost::asio::cancellation_type::terminal);
+
+ service.run();
+ ASSERT_TRUE(result);
+ ec = *result;
+
+ signal.emit(boost::asio::cancellation_type::all); // noop
+ } while (ec == std::errc::no_such_file_or_directory && --tries);
+
+ EXPECT_EQ(ec, boost::asio::error::operation_aborted);
+}
+
+TEST_F(AsioRados, AsyncWriteOperationCancelTotal)
+{
+ boost::asio::io_context service;
+ boost::asio::cancellation_signal signal;
+ std::optional<error_code> ec;
+
+ librados::ObjectWriteOperation op;
+ op.assert_exists();
+ librados::async_operate(service, io, "noexist", &op, 0, nullptr,
+ capture(signal, ec));
+
+ service.poll();
+ EXPECT_FALSE(service.stopped());
+ EXPECT_FALSE(ec);
+
+ // noop, write only supports terminal
+ signal.emit(boost::asio::cancellation_type::total);
+
+ service.run();
+ ASSERT_TRUE(ec);
+ EXPECT_EQ(ec, std::errc::no_such_file_or_directory);
+
+ signal.emit(boost::asio::cancellation_type::all); // noop
+}
+
+#endif // not _WIN32
+
int main(int argc, char **argv)
{
auto args = argv_to_vec(argc, argv);
diff --git a/src/test/objectstore/ObjectStoreImitator.h b/src/test/objectstore/ObjectStoreImitator.h
index d71d7f2fe58..875f9041b83 100644
--- a/src/test/objectstore/ObjectStoreImitator.h
+++ b/src/test/objectstore/ObjectStoreImitator.h
@@ -347,6 +347,16 @@ public:
) override {
return {};
}
+
+ int omap_iterate(CollectionHandle &c, ///< [in] collection
+ const ghobject_t &oid, ///< [in] object
+ /// [in] where the iterator should point to at the beginning
+ omap_iter_seek_t start_from,
+ std::function<omap_iter_ret_t(std::string_view, std::string_view)> f
+ ) override {
+ return 0;
+ }
+
void set_fsid(uuid_d u) override {}
uuid_d get_fsid() override { return {}; }
uint64_t estimate_objects_overhead(uint64_t num_objects) override {
diff --git a/src/test/objectstore/allocsim/ops_replayer.cc b/src/test/objectstore/allocsim/ops_replayer.cc
index fd947f5c454..c5908d9f576 100644
--- a/src/test/objectstore/allocsim/ops_replayer.cc
+++ b/src/test/objectstore/allocsim/ops_replayer.cc
@@ -1,4 +1,5 @@
#include <algorithm>
+#include <functional>
#include <boost/program_options/value_semantic.hpp>
#include <cassert>
#include <cctype>
@@ -13,26 +14,46 @@
#include <fstream>
#include <filesystem>
#include <mutex>
-#include "include/rados/buffer_fwd.h"
-#include "include/rados/librados.hpp"
#include <atomic>
-#include <fmt/format.h>
#include <map>
#include <memory>
#include <random>
#include <string>
#include <iostream>
#include <vector>
+#include <format>
+
+#include <fmt/format.h>
#include <boost/program_options/variables_map.hpp>
#include <boost/program_options/parsers.hpp>
+#include "include/rados/buffer_fwd.h"
+#include "include/rados/librados.hpp"
+
namespace po = boost::program_options;
using namespace std;
using namespace ceph;
+namespace settings {
+
+// Returns a function which restricts a value to a specified range by throwing if it is not in range:
+// (Note: std::clamp() does not throw.)
+auto clamp_or_throw(auto min, auto max)
+{
+ return [=](auto& x) {
+ if(std::less<>{}(x, min) or std::greater<>{}(x, max)) {
+ throw std::out_of_range(fmt::format("value expected between {} and {}, but got {}", min, max, x));
+ }
+
+ return x;
+ };
+}
+
+} // namespace settings
+
// compare shared_ptr<string>
struct StringPtrCompare
{
@@ -338,8 +359,8 @@ int main(int argc, char** argv) {
// options
uint64_t io_depth = 8;
- uint64_t nparser_threads = 16;
- uint64_t nworker_threads = 16;
+ int nparser_threads = 16;
+ int nworker_threads = 16;
string file("input.txt");
string ceph_conf_path("./ceph.conf");
string pool("test_pool");
@@ -351,8 +372,8 @@ int main(int argc, char** argv) {
("input-files,i", po::value<vector<string>>()->multitoken(), "List of input files (output of op_scraper.py). Multiple files will be merged and sorted by time order")
("ceph-conf", po::value<string>(&ceph_conf_path)->default_value("ceph.conf"), "Path to ceph conf")
("io-depth", po::value<uint64_t>(&io_depth)->default_value(64), "I/O depth")
- ("parser-threads", po::value<uint64_t>(&nparser_threads)->default_value(16), "Number of parser threads")
- ("worker-threads", po::value<uint64_t>(&nworker_threads)->default_value(16), "Number of I/O worker threads")
+ ("parser-threads", po::value<int>(&nparser_threads)->default_value(16)->notifier(settings::clamp_or_throw(1, 256)), "Number of parser threads")
+ ("worker-threads", po::value<int>(&nworker_threads)->default_value(16)->notifier(settings::clamp_or_throw(1, 256)), "Number of I/O worker threads")
("pool", po::value<string>(&pool)->default_value("test_pool"), "Pool to use for I/O")
("skip-do-ops", po::bool_switch(&skip_do_ops)->default_value(false), "Skip doing operations")
;
diff --git a/src/test/osd/CMakeLists.txt b/src/test/osd/CMakeLists.txt
index f2d1471e22e..798558ebbe0 100644
--- a/src/test/osd/CMakeLists.txt
+++ b/src/test/osd/CMakeLists.txt
@@ -22,7 +22,7 @@ install(TARGETS
add_executable(ceph_test_rados_io_sequence
${CMAKE_CURRENT_SOURCE_DIR}/ceph_test_rados_io_sequence.cc)
target_link_libraries(ceph_test_rados_io_sequence
- librados global object_io_exerciser)
+ librados global object_io_exerciser json_structures)
install(TARGETS
ceph_test_rados_io_sequence
DESTINATION ${CMAKE_INSTALL_BINDIR})
diff --git a/src/test/osd/ceph_test_rados_io_sequence.cc b/src/test/osd/ceph_test_rados_io_sequence.cc
index 4a768a016e2..96808ea37e5 100644
--- a/src/test/osd/ceph_test_rados_io_sequence.cc
+++ b/src/test/osd/ceph_test_rados_io_sequence.cc
@@ -1,83 +1,104 @@
#include "ceph_test_rados_io_sequence.h"
+#include <boost/asio/io_context.hpp>
#include <iostream>
#include <vector>
-#include <boost/asio/io_context.hpp>
-
-#include "include/random.h"
-
-#include "librados/librados_asio.h"
-#include "common/ceph_argparse.h"
-#include "include/interval_set.h"
-#include "global/global_init.h"
-#include "global/global_context.h"
+#include "common/Formatter.h"
#include "common/Thread.h"
+#include "common/ceph_argparse.h"
+#include "common/ceph_json.h"
#include "common/debug.h"
#include "common/dout.h"
#include "common/split.h"
#include "common/strtol.h" // for strict_iecstrtoll()
+#include "common/ceph_json.h"
+#include "common/Formatter.h"
#include "common/io_exerciser/DataGenerator.h"
+#include "common/io_exerciser/EcIoSequence.h"
+#include "common/io_exerciser/IoOp.h"
+#include "common/io_exerciser/IoSequence.h"
#include "common/io_exerciser/Model.h"
#include "common/io_exerciser/ObjectModel.h"
#include "common/io_exerciser/RadosIo.h"
-#include "common/io_exerciser/IoOp.h"
-#include "common/io_exerciser/IoSequence.h"
+#include "common/json/BalancerStructures.h"
+#include "common/json/ConfigStructures.h"
+#include "common/json/OSDStructures.h"
+#include "fmt/format.h"
+#include "global/global_context.h"
+#include "global/global_init.h"
+#include "include/interval_set.h"
+#include "include/random.h"
+#include "json_spirit/json_spirit.h"
+#include "librados/librados_asio.h"
#define dout_subsys ceph_subsys_rados
#define dout_context g_ceph_context
+using OpType = ceph::io_exerciser::OpType;
+
+using DoneOp = ceph::io_exerciser::DoneOp;
+using BarrierOp = ceph::io_exerciser::BarrierOp;
+using CreateOp = ceph::io_exerciser::CreateOp;
+using RemoveOp = ceph::io_exerciser::RemoveOp;
+using SingleReadOp = ceph::io_exerciser::SingleReadOp;
+using DoubleReadOp = ceph::io_exerciser::DoubleReadOp;
+using TripleReadOp = ceph::io_exerciser::TripleReadOp;
+using SingleWriteOp = ceph::io_exerciser::SingleWriteOp;
+using DoubleWriteOp = ceph::io_exerciser::DoubleWriteOp;
+using TripleWriteOp = ceph::io_exerciser::TripleWriteOp;
+using SingleFailedWriteOp = ceph::io_exerciser::SingleFailedWriteOp;
+using DoubleFailedWriteOp = ceph::io_exerciser::DoubleFailedWriteOp;
+using TripleFailedWriteOp = ceph::io_exerciser::TripleFailedWriteOp;
+
namespace {
- struct Size {};
- void validate(boost::any& v, const std::vector<std::string>& values,
- Size *target_type, int) {
- po::validators::check_first_occurrence(v);
- const std::string &s = po::validators::get_single_string(values);
-
- std::string parse_error;
- uint64_t size = strict_iecstrtoll(s, &parse_error);
- if (!parse_error.empty()) {
- throw po::validation_error(po::validation_error::invalid_option_value);
- }
- v = boost::any(size);
- }
-
- struct Pair {};
- void validate(boost::any& v, const std::vector<std::string>& values,
- Pair *target_type, int) {
- po::validators::check_first_occurrence(v);
- const std::string &s = po::validators::get_single_string(values);
- auto part = ceph::split(s).begin();
- std::string parse_error;
- int first = strict_iecstrtoll(*part++, &parse_error);
- int second = strict_iecstrtoll(*part, &parse_error);
- if (!parse_error.empty()) {
- throw po::validation_error(po::validation_error::invalid_option_value);
- }
- v = boost::any(std::pair<int,int>{first,second});
- }
-
- struct PluginString {};
- void validate(boost::any& v, const std::vector<std::string>& values,
- PluginString *target_type, int) {
- po::validators::check_first_occurrence(v);
- const std::string &s = po::validators::get_single_string(values);
-
- const std::string_view* pluginIt = std::find(
- ceph::io_sequence::tester::pluginChoices.begin(),
- ceph::io_sequence::tester::pluginChoices.end(),
- s
- );
- if(ceph::io_sequence::tester::pluginChoices.end() == pluginIt)
- {
- throw po::validation_error(po::validation_error::invalid_option_value);
- }
+struct Size {};
+void validate(boost::any& v, const std::vector<std::string>& values,
+ Size* target_type, int) {
+ po::validators::check_first_occurrence(v);
+ const std::string& s = po::validators::get_single_string(values);
- v = boost::any(*pluginIt);
+ std::string parse_error;
+ uint64_t size = strict_iecstrtoll(s, &parse_error);
+ if (!parse_error.empty()) {
+ throw po::validation_error(po::validation_error::invalid_option_value);
}
+ v = boost::any(size);
+}
+
+struct Pair {};
+void validate(boost::any& v, const std::vector<std::string>& values,
+ Pair* target_type, int) {
+ po::validators::check_first_occurrence(v);
+ const std::string& s = po::validators::get_single_string(values);
+ auto part = ceph::split(s).begin();
+ std::string parse_error;
+ int first = strict_iecstrtoll(*part++, &parse_error);
+ int second = strict_iecstrtoll(*part, &parse_error);
+ if (!parse_error.empty()) {
+ throw po::validation_error(po::validation_error::invalid_option_value);
+ }
+ v = boost::any(std::pair<int, int>{first, second});
+}
+
+struct PluginString {};
+void validate(boost::any& v, const std::vector<std::string>& values,
+ PluginString* target_type, int) {
+ po::validators::check_first_occurrence(v);
+ const std::string& s = po::validators::get_single_string(values);
+
+ const std::string_view* pluginIt =
+ std::find(ceph::io_sequence::tester::pluginChoices.begin(),
+ ceph::io_sequence::tester::pluginChoices.end(), s);
+ if (ceph::io_sequence::tester::pluginChoices.end() == pluginIt) {
+ throw po::validation_error(po::validation_error::invalid_option_value);
+ }
+
+ v = boost::any(*pluginIt);
+}
- constexpr std::string_view usage[] = {
+constexpr std::string_view usage[] = {
"Basic usage:",
"",
"ceph_test_rados_io_sequence",
@@ -119,103 +140,99 @@ namespace {
"\t are specified with unit of blocksize. Supported commands:",
"\t\t create <len>",
"\t\t remove",
- "\t\t read|write <off> <len>",
- "\t\t read2|write2 <off> <len> <off> <len>",
- "\t\t read3|write3 <off> <len> <off> <len> <off> <len>",
- "\t\t done"
- };
-
- po::options_description get_options_description()
- {
- po::options_description desc("ceph_test_rados_io options");
- desc.add_options()
- ("help,h",
- "show help message")
- ("listsequence,l",
- "show list of sequences")
- ("dryrun,d",
- "test sequence, do not issue any I/O")
- ("verbose",
- "more verbose output during test")
- ("sequence,s", po::value<int>(),
- "test specified sequence")
- ("seed", po::value<int>(),
- "seed for whole test")
- ("seqseed", po::value<int>(),
- "seed for sequence")
- ("blocksize,b", po::value<Size>(),
- "block size (default 2048)")
- ("chunksize,c", po::value<Size>(),
- "chunk size (default 4096)")
- ("pool,p", po::value<std::string>(),
- "pool name")
- ("object,o", po::value<std::string>()->default_value("test"),
- "object name")
- ("km", po::value<Pair>(),
- "k,m EC pool profile (default 2,2)")
- ("plugin", po::value<PluginString>(),
- "EC plugin (isa or jerasure)")
- ("objectsize", po::value<Pair>(),
- "min,max object size in blocks (default 1,32)")
- ("threads,t", po::value<int>(),
- "number of threads of I/O per object (default 1)")
- ("parallel,p", po::value<int>()->default_value(1),
- "number of objects to exercise in parallel")
- ("interactive",
- "interactive mode, execute IO commands from stdin");
-
- return desc;
- }
-
- int parse_io_seq_options(
- po::variables_map& vm,
- int argc,
- char** argv)
- {
- std::vector<std::string> unrecognized_options;
- try {
- po::options_description desc = get_options_description();
-
- auto parsed = po::command_line_parser(argc, argv)
- .options(desc)
- .allow_unregistered()
- .run();
- po::store(parsed, vm);
- po::notify(vm);
- unrecognized_options = po::collect_unrecognized(parsed.options,
- po::include_positional);
-
- if (!unrecognized_options.empty())
- {
- std::stringstream ss;
- ss << "Unrecognised command options supplied: ";
- while (unrecognized_options.size() > 1)
- {
- ss << unrecognized_options.back().c_str() << ", ";
- unrecognized_options.pop_back();
- }
- ss << unrecognized_options.back();
- dout(0) << ss.str() << dendl;
- return 1;
+ "\t\t read|write|failedwrite <off> <len>",
+ "\t\t read2|write2|failedwrite2 <off> <len> <off> <len>",
+ "\t\t read3|write3|failedwrite3 <off> <len> <off> <len> <off> <len>",
+ "\t\t injecterror <type> <shard> <good_count> <fail_count>",
+ "\t\t clearinject <type> <shard>",
+ "\t\t done"};
+
+po::options_description get_options_description() {
+ po::options_description desc("ceph_test_rados_io options");
+ desc.add_options()("help,h", "show help message")("listsequence,l",
+ "show list of sequences")(
+ "dryrun,d", "test sequence, do not issue any I/O")(
+ "verbose", "more verbose output during test")(
+ "sequence,s", po::value<int>(), "test specified sequence")(
+ "seed", po::value<int>(), "seed for whole test")(
+ "seqseed", po::value<int>(), "seed for sequence")(
+ "blocksize,b", po::value<Size>(), "block size (default 2048)")(
+ "chunksize,c", po::value<Size>(), "chunk size (default 4096)")(
+ "pool,p", po::value<std::string>(), "pool name")(
+ "object,o", po::value<std::string>()->default_value("test"),
+ "object name")("km", po::value<Pair>(),
+ "k,m EC pool profile (default 2,2)")(
+ "plugin", po::value<PluginString>(), "EC plugin (isa or jerasure)")(
+ "objectsize", po::value<Pair>(),
+ "min,max object size in blocks (default 1,32)")(
+ "threads,t", po::value<int>(),
+ "number of threads of I/O per object (default 1)")(
+ "parallel,p", po::value<int>()->default_value(1),
+ "number of objects to exercise in parallel")(
+ "testrecovery",
+ "Inject errors during sequences to test recovery processes of OSDs")(
+ "interactive", "interactive mode, execute IO commands from stdin")(
+ "allow_pool_autoscaling",
+ "Allows pool autoscaling. Disabled by default.")(
+ "allow_pool_balancer", "Enables pool balancing. Disabled by default.")(
+ "allow_pool_deep_scrubbing",
+ "Enables pool deep scrub. Disabled by default.")(
+ "allow_pool_scrubbing", "Enables pool scrubbing. Disabled by default.");
+
+ return desc;
+}
+
+int parse_io_seq_options(po::variables_map& vm, int argc, char** argv) {
+ std::vector<std::string> unrecognized_options;
+ try {
+ po::options_description desc = get_options_description();
+
+ auto parsed = po::command_line_parser(argc, argv)
+ .options(desc)
+ .allow_unregistered()
+ .run();
+ po::store(parsed, vm);
+ po::notify(vm);
+ unrecognized_options =
+ po::collect_unrecognized(parsed.options, po::include_positional);
+
+ if (!unrecognized_options.empty()) {
+ std::stringstream ss;
+ ss << "Unrecognised command options supplied: ";
+ while (unrecognized_options.size() > 1) {
+ ss << unrecognized_options.back().c_str() << ", ";
+ unrecognized_options.pop_back();
}
- } catch(const po::error& e) {
- std::cerr << "error: " << e.what() << std::endl;
+ ss << unrecognized_options.back();
+ dout(0) << ss.str() << dendl;
return 1;
}
-
- return 0;
+ } catch (const po::error& e) {
+ std::cerr << "error: " << e.what() << std::endl;
+ return 1;
}
+
+ return 0;
}
+template <typename S>
+int send_mon_command(S& s, librados::Rados& rados, const char* name,
+ ceph::buffer::list& inbl, ceph::buffer::list* outbl, Formatter* f) {
+ std::ostringstream oss;
+ encode_json(name, s, f);
+ f->flush(oss);
+ int rc = rados.mon_command(oss.str(), inbl, outbl, nullptr);
+ return rc;
+}
+
+} // namespace
+
template <typename T, int N, const std::array<T, N>& Ts>
-ceph::io_sequence::tester::ProgramOptionSelector<T, N, Ts>
- ::ProgramOptionSelector(ceph::util::random_number_generator<int>& rng,
- po::variables_map vm,
- const std::string& option_name,
- bool set_forced,
- bool select_first)
- : rng(rng),
- option_name(option_name) {
+ceph::io_sequence::tester::ProgramOptionSelector<T, N, Ts>::
+ ProgramOptionSelector(ceph::util::random_number_generator<int>& rng,
+ po::variables_map vm, const std::string& option_name,
+ bool set_forced, bool select_first)
+ : rng(rng), option_name(option_name) {
if (set_forced && vm.count(option_name)) {
force_value = vm[option_name].as<T>();
}
@@ -226,76 +243,54 @@ ceph::io_sequence::tester::ProgramOptionSelector<T, N, Ts>
}
template <typename T, int N, const std::array<T, N>& Ts>
-bool ceph::io_sequence::tester::ProgramOptionSelector<T, N, Ts>::isForced()
-{
+bool ceph::io_sequence::tester::ProgramOptionSelector<T, N, Ts>::isForced() {
return force_value.has_value();
}
template <typename T, int N, const std::array<T, N>& Ts>
-const T ceph::io_sequence::tester::ProgramOptionSelector<T, N, Ts>::choose()
-{
+const T ceph::io_sequence::tester::ProgramOptionSelector<T, N, Ts>::choose() {
if (force_value.has_value()) {
return *force_value;
} else if (first_value.has_value()) {
return *std::exchange(first_value, std::nullopt);
} else {
- return choices[rng(N-1)];
+ return choices[rng(N - 1)];
}
}
-
-
ceph::io_sequence::tester::SelectObjectSize::SelectObjectSize(
- ceph::util::random_number_generator<int>& rng,
- po::variables_map vm)
- : ProgramOptionSelector(rng, vm, "objectsize", true, true)
-{
-}
-
-
+ ceph::util::random_number_generator<int>& rng, po::variables_map vm)
+ : ProgramOptionSelector(rng, vm, "objectsize", true, true) {}
ceph::io_sequence::tester::SelectBlockSize::SelectBlockSize(
- ceph::util::random_number_generator<int>& rng,
- po::variables_map vm)
- : ProgramOptionSelector(rng, vm, "blocksize", true, true)
-{
-}
-
-
+ ceph::util::random_number_generator<int>& rng, po::variables_map vm)
+ : ProgramOptionSelector(rng, vm, "blocksize", true, true) {}
ceph::io_sequence::tester::SelectNumThreads::SelectNumThreads(
- ceph::util::random_number_generator<int>& rng,
- po::variables_map vm)
- : ProgramOptionSelector(rng, vm, "threads", true, true)
-{
-}
-
-
+ ceph::util::random_number_generator<int>& rng, po::variables_map vm)
+ : ProgramOptionSelector(rng, vm, "threads", true, true) {}
ceph::io_sequence::tester::SelectSeqRange::SelectSeqRange(
- ceph::util::random_number_generator<int>& rng,
- po::variables_map vm)
- : ProgramOptionSelector(rng, vm, "sequence", false, false)
-{
+ ceph::util::random_number_generator<int>& rng, po::variables_map vm)
+ : ProgramOptionSelector(rng, vm, "sequence", false, false) {
if (vm.count(option_name)) {
ceph::io_exerciser::Sequence s =
- static_cast<ceph::io_exerciser::Sequence>(vm["sequence"].as<int>());
+ static_cast<ceph::io_exerciser::Sequence>(vm["sequence"].as<int>());
if (s < ceph::io_exerciser::Sequence::SEQUENCE_BEGIN ||
s >= ceph::io_exerciser::Sequence::SEQUENCE_END) {
dout(0) << "Sequence argument out of range" << dendl;
throw po::validation_error(po::validation_error::invalid_option_value);
}
ceph::io_exerciser::Sequence e = s;
- force_value = std::make_optional<std::pair<ceph::io_exerciser::Sequence,
- ceph::io_exerciser::Sequence>>(
- std::make_pair(s, ++e));
+ force_value = std::make_optional<
+ std::pair<ceph::io_exerciser::Sequence, ceph::io_exerciser::Sequence>>(
+ std::make_pair(s, ++e));
}
}
-const std::pair<ceph::io_exerciser::Sequence,ceph::io_exerciser::Sequence>
- ceph::io_sequence::tester::SelectSeqRange::choose() {
- if (force_value.has_value())
- {
+const std::pair<ceph::io_exerciser::Sequence, ceph::io_exerciser::Sequence>
+ceph::io_sequence::tester::SelectSeqRange::choose() {
+ if (force_value.has_value()) {
return *force_value;
} else {
return std::make_pair(ceph::io_exerciser::Sequence::SEQUENCE_BEGIN,
@@ -303,45 +298,34 @@ const std::pair<ceph::io_exerciser::Sequence,ceph::io_exerciser::Sequence>
}
}
-
-
ceph::io_sequence::tester::SelectErasureKM::SelectErasureKM(
- ceph::util::random_number_generator<int>& rng,
- po::variables_map vm)
- : ProgramOptionSelector(rng, vm, "km", true, true)
-{
-}
-
-
+ ceph::util::random_number_generator<int>& rng, po::variables_map vm)
+ : ProgramOptionSelector(rng, vm, "km", true, true) {}
ceph::io_sequence::tester::SelectErasurePlugin::SelectErasurePlugin(
- ceph::util::random_number_generator<int>& rng,
- po::variables_map vm)
- : ProgramOptionSelector(rng, vm, "plugin", true, false)
-{
-}
-
-
-
-ceph::io_sequence::tester::SelectErasureChunkSize::SelectErasureChunkSize(ceph::util::random_number_generator<int>& rng, po::variables_map vm)
- : ProgramOptionSelector(rng, vm, "stripe_unit", true, false)
-{
-}
-
+ ceph::util::random_number_generator<int>& rng, po::variables_map vm)
+ : ProgramOptionSelector(rng, vm, "plugin", true, false) {}
+ceph::io_sequence::tester::SelectErasureChunkSize::SelectErasureChunkSize(
+ ceph::util::random_number_generator<int>& rng, po::variables_map vm)
+ : ProgramOptionSelector(rng, vm, "chunksize", true, true) {}
ceph::io_sequence::tester::SelectECPool::SelectECPool(
- ceph::util::random_number_generator<int>& rng,
- po::variables_map vm,
- librados::Rados& rados,
- bool dry_run)
- : ProgramOptionSelector(rng, vm, "pool", false, false),
- rados(rados),
- dry_run(dry_run),
- skm(SelectErasureKM(rng, vm)),
- spl(SelectErasurePlugin(rng, vm)),
- scs(SelectErasureChunkSize(rng, vm))
-{
+ ceph::util::random_number_generator<int>& rng, po::variables_map vm,
+ librados::Rados& rados, bool dry_run, bool allow_pool_autoscaling,
+ bool allow_pool_balancer, bool allow_pool_deep_scrubbing,
+ bool allow_pool_scrubbing, bool test_recovery)
+ : ProgramOptionSelector(rng, vm, "pool", false, false),
+ rados(rados),
+ dry_run(dry_run),
+ allow_pool_autoscaling(allow_pool_autoscaling),
+ allow_pool_balancer(allow_pool_balancer),
+ allow_pool_deep_scrubbing(allow_pool_deep_scrubbing),
+ allow_pool_scrubbing(allow_pool_scrubbing),
+ test_recovery(test_recovery),
+ skm(SelectErasureKM(rng, vm)),
+ spl(SelectErasurePlugin(rng, vm)),
+ scs(SelectErasureChunkSize(rng, vm)) {
if (!skm.isForced()) {
if (vm.count("pool")) {
force_value = vm["pool"].as<std::string>();
@@ -349,147 +333,239 @@ ceph::io_sequence::tester::SelectECPool::SelectECPool(
}
}
-const std::string ceph::io_sequence::tester::SelectECPool::choose()
-{
- std::pair<int,int> value;
+const std::string ceph::io_sequence::tester::SelectECPool::choose() {
+ std::pair<int, int> value;
if (!skm.isForced() && force_value.has_value()) {
+ int rc;
+ bufferlist inbl, outbl;
+ auto formatter = std::make_unique<JSONFormatter>(false);
+
+ ceph::messaging::osd::OSDPoolGetRequest osdPoolGetRequest{*force_value};
+ rc = send_mon_command(osdPoolGetRequest, rados, "OSDPoolGetRequest", inbl,
+ &outbl, formatter.get());
+ ceph_assert(rc == 0);
+
+ JSONParser p;
+ bool success = p.parse(outbl.c_str(), outbl.length());
+ ceph_assert(success);
+
+ ceph::messaging::osd::OSDPoolGetReply osdPoolGetReply;
+ osdPoolGetReply.decode_json(&p);
+
+ ceph::messaging::osd::OSDECProfileGetRequest osdECProfileGetRequest{
+ osdPoolGetReply.erasure_code_profile};
+ rc = send_mon_command(osdECProfileGetRequest, rados,
+ "OSDECProfileGetRequest", inbl, &outbl,
+ formatter.get());
+ ceph_assert(rc == 0);
+
+ success = p.parse(outbl.c_str(), outbl.length());
+ ceph_assert(success);
+
+ ceph::messaging::osd::OSDECProfileGetReply reply;
+ reply.decode_json(&p);
+ k = reply.k;
+ m = reply.m;
return *force_value;
} else {
value = skm.choose();
}
- int k = value.first;
- int m = value.second;
+ k = value.first;
+ m = value.second;
const std::string plugin = std::string(spl.choose());
const uint64_t chunk_size = scs.choose();
- std::string pool_name = "ec_" + plugin +
- "_cs" + std::to_string(chunk_size) +
- "_k" + std::to_string(k) +
- "_m" + std::to_string(m);
- if (!dry_run)
- {
+ std::string pool_name = "ec_" + plugin + "_cs" + std::to_string(chunk_size) +
+ "_k" + std::to_string(k) + "_m" + std::to_string(m);
+ if (!dry_run) {
create_pool(rados, pool_name, plugin, chunk_size, k, m);
}
return pool_name;
}
void ceph::io_sequence::tester::SelectECPool::create_pool(
- librados::Rados& rados,
- const std::string& pool_name,
- const std::string& plugin,
- uint64_t chunk_size,
- int k, int m)
-{
+ librados::Rados& rados, const std::string& pool_name,
+ const std::string& plugin, uint64_t chunk_size, int k, int m) {
int rc;
bufferlist inbl, outbl;
- std::string profile_create =
- "{\"prefix\": \"osd erasure-code-profile set\", \
- \"name\": \"testprofile-" + pool_name + "\", \
- \"profile\": [ \"plugin=" + plugin + "\", \
- \"k=" + std::to_string(k) + "\", \
- \"m=" + std::to_string(m) + "\", \
- \"stripe_unit=" + std::to_string(chunk_size) + "\", \
- \"crush-failure-domain=osd\"]}";
- rc = rados.mon_command(profile_create, inbl, &outbl, nullptr);
+ auto formatter = std::make_unique<JSONFormatter>(false);
+
+ ceph::messaging::osd::OSDECProfileSetRequest ecProfileSetRequest{
+ fmt::format("testprofile-{}", pool_name),
+ {fmt::format("plugin={}", plugin), fmt::format("k={}", k),
+ fmt::format("m={}", m), fmt::format("stripe_unit={}", chunk_size),
+ fmt::format("crush-failure-domain=osd")}};
+ rc = send_mon_command(ecProfileSetRequest, rados, "OSDECProfileSetRequest",
+ inbl, &outbl, formatter.get());
ceph_assert(rc == 0);
- std::string cmdstr =
- "{\"prefix\": \"osd pool create\", \
- \"pool\": \"" + pool_name + "\", \
- \"pool_type\": \"erasure\", \
- \"pg_num\": 8, \
- \"pgp_num\": 8, \
- \"erasure_code_profile\": \"testprofile-" + pool_name + "\"}";
- rc = rados.mon_command(cmdstr, inbl, &outbl, nullptr);
+
+ ceph::messaging::osd::OSDECPoolCreateRequest poolCreateRequest{
+ pool_name, "erasure", 8, 8, fmt::format("testprofile-{}", pool_name)};
+ rc = send_mon_command(poolCreateRequest, rados, "OSDECPoolCreateRequest",
+ inbl, &outbl, formatter.get());
ceph_assert(rc == 0);
-}
+ if (allow_pool_autoscaling) {
+ ceph::messaging::osd::OSDSetRequest setNoAutoscaleRequest{"noautoscale",
+ std::nullopt};
+ rc = send_mon_command(setNoAutoscaleRequest, rados, "OSDSetRequest", inbl,
+ &outbl, formatter.get());
+ ceph_assert(rc == 0);
+ }
+
+ if (allow_pool_balancer) {
+ ceph::messaging::balancer::BalancerOffRequest balancerOffRequest{};
+ rc = send_mon_command(balancerOffRequest, rados, "BalancerOffRequest", inbl,
+ &outbl, formatter.get());
+ ceph_assert(rc == 0);
+
+ ceph::messaging::balancer::BalancerStatusRequest balancerStatusRequest{};
+ rc = send_mon_command(balancerStatusRequest, rados, "BalancerStatusRequest",
+ inbl, &outbl, formatter.get());
+ ceph_assert(rc == 0);
+
+ JSONParser p;
+ bool success = p.parse(outbl.c_str(), outbl.length());
+ ceph_assert(success);
+
+ ceph::messaging::balancer::BalancerStatusReply reply;
+ reply.decode_json(&p);
+ ceph_assert(!reply.active);
+ }
+ if (allow_pool_deep_scrubbing) {
+ ceph::messaging::osd::OSDSetRequest setNoDeepScrubRequest{"nodeep-scrub",
+ std::nullopt};
+ rc = send_mon_command(setNoDeepScrubRequest, rados, "setNoDeepScrubRequest",
+ inbl, &outbl, formatter.get());
+ ceph_assert(rc == 0);
+ }
+
+ if (allow_pool_scrubbing) {
+ ceph::messaging::osd::OSDSetRequest setNoScrubRequest{"noscrub",
+ std::nullopt};
+ rc = send_mon_command(setNoScrubRequest, rados, "OSDSetRequest", inbl,
+ &outbl, formatter.get());
+ ceph_assert(rc == 0);
+ }
+
+ if (test_recovery) {
+ ceph::messaging::config::ConfigSetRequest configSetBluestoreDebugRequest{
+ "global", "bluestore_debug_inject_read_err", "true", std::nullopt};
+ rc = send_mon_command(configSetBluestoreDebugRequest, rados,
+ "ConfigSetRequest", inbl, &outbl,
+ formatter.get());
+ ceph_assert(rc == 0);
+
+ ceph::messaging::config::ConfigSetRequest configSetMaxMarkdownRequest{
+ "global", "osd_max_markdown_count", "99999999", std::nullopt};
+ rc =
+ send_mon_command(configSetMaxMarkdownRequest, rados, "ConfigSetRequest",
+ inbl, &outbl, formatter.get());
+ ceph_assert(rc == 0);
+ }
+}
-ceph::io_sequence::tester::TestObject::TestObject( const std::string oid,
- librados::Rados& rados,
- boost::asio::io_context& asio,
- SelectBlockSize& sbs,
- SelectECPool& spo,
- SelectObjectSize& sos,
- SelectNumThreads& snt,
- SelectSeqRange& ssr,
- ceph::util::random_number_generator<int>& rng,
- ceph::mutex& lock,
- ceph::condition_variable& cond,
- bool dryrun,
- bool verbose,
- std::optional<int> seqseed) :
- rng(rng), verbose(verbose), seqseed(seqseed)
-{
+ceph::io_sequence::tester::TestObject::TestObject(
+ const std::string oid, librados::Rados& rados,
+ boost::asio::io_context& asio, SelectBlockSize& sbs, SelectECPool& spo,
+ SelectObjectSize& sos, SelectNumThreads& snt, SelectSeqRange& ssr,
+ ceph::util::random_number_generator<int>& rng, ceph::mutex& lock,
+ ceph::condition_variable& cond, bool dryrun, bool verbose,
+ std::optional<int> seqseed, bool testrecovery)
+ : rng(rng), verbose(verbose), seqseed(seqseed), testrecovery(testrecovery) {
if (dryrun) {
- verbose = true;
- exerciser_model = std::make_unique<ceph::io_exerciser::ObjectModel>(oid,
- sbs.choose(),
- rng());
+ exerciser_model = std::make_unique<ceph::io_exerciser::ObjectModel>(
+ oid, sbs.choose(), rng());
} else {
const std::string pool = spo.choose();
+ poolK = spo.getChosenK();
+ poolM = spo.getChosenM();
+
int threads = snt.choose();
- exerciser_model = std::make_unique<ceph::io_exerciser::RadosIo>(rados,
- asio,
- pool,
- oid,
- sbs.choose(),
- rng(),
- threads,
- lock,
- cond);
- dout(0) << "= " << oid << " pool=" << pool
- << " threads=" << threads
- << " blocksize=" << exerciser_model->get_block_size()
- << " =" << dendl;
+
+ bufferlist inbl, outbl;
+ auto formatter = std::make_unique<JSONFormatter>(false);
+
+ std::optional<std::vector<int>> cached_shard_order = std::nullopt;
+
+ if (!spo.get_allow_pool_autoscaling() && !spo.get_allow_pool_balancer() &&
+ !spo.get_allow_pool_deep_scrubbing() &&
+ !spo.get_allow_pool_scrubbing()) {
+ ceph::messaging::osd::OSDMapRequest osdMapRequest{pool, oid, ""};
+ int rc = send_mon_command(osdMapRequest, rados, "OSDMapRequest", inbl,
+ &outbl, formatter.get());
+ ceph_assert(rc == 0);
+
+ JSONParser p;
+ bool success = p.parse(outbl.c_str(), outbl.length());
+ ceph_assert(success);
+
+ ceph::messaging::osd::OSDMapReply reply{};
+ reply.decode_json(&p);
+ cached_shard_order = reply.acting;
+ }
+
+ exerciser_model = std::make_unique<ceph::io_exerciser::RadosIo>(
+ rados, asio, pool, oid, cached_shard_order, sbs.choose(), rng(),
+ threads, lock, cond);
+ dout(0) << "= " << oid << " pool=" << pool << " threads=" << threads
+ << " blocksize=" << exerciser_model->get_block_size() << " ="
+ << dendl;
}
obj_size_range = sos.choose();
seq_range = ssr.choose();
curseq = seq_range.first;
- seq = ceph::io_exerciser::IoSequence::generate_sequence(curseq,
- obj_size_range,
- seqseed.value_or(rng()));
+
+ if (testrecovery) {
+ seq = ceph::io_exerciser::EcIoSequence::generate_sequence(
+ curseq, obj_size_range, poolK, poolM, seqseed.value_or(rng()));
+ } else {
+ seq = ceph::io_exerciser::IoSequence::generate_sequence(
+ curseq, obj_size_range, seqseed.value_or(rng()));
+ }
+
op = seq->next();
done = false;
- dout(0) << "== " << exerciser_model->get_oid() << " "
- << curseq << " "
- << seq->get_name()
- << " ==" <<dendl;
+ dout(0) << "== " << exerciser_model->get_oid() << " " << curseq << " "
+ << seq->get_name_with_seqseed() << " ==" << dendl;
}
-bool ceph::io_sequence::tester::TestObject::readyForIo()
-{
+bool ceph::io_sequence::tester::TestObject::readyForIo() {
return exerciser_model->readyForIoOp(*op);
}
-bool ceph::io_sequence::tester::TestObject::next()
-{
+bool ceph::io_sequence::tester::TestObject::next() {
if (!done) {
if (verbose) {
- dout(0) << exerciser_model->get_oid()
- << " Step " << seq->get_step() << ": "
- << op->to_string(exerciser_model->get_block_size()) << dendl;
+ dout(0) << exerciser_model->get_oid() << " Step " << seq->get_step()
+ << ": " << op->to_string(exerciser_model->get_block_size())
+ << dendl;
} else {
- dout(5) << exerciser_model->get_oid()
- << " Step " << seq->get_step() << ": "
- << op->to_string(exerciser_model->get_block_size()) << dendl;
+ dout(5) << exerciser_model->get_oid() << " Step " << seq->get_step()
+ << ": " << op->to_string(exerciser_model->get_block_size())
+ << dendl;
}
exerciser_model->applyIoOp(*op);
- if (op->done()) {
- ++curseq;
- if (curseq == seq_range.second) {
+ if (op->getOpType() == ceph::io_exerciser::OpType::Done) {
+ curseq = seq->getNextSupportedSequenceId();
+ if (curseq >= seq_range.second) {
done = true;
dout(0) << exerciser_model->get_oid()
<< " Number of IOs = " << exerciser_model->get_num_io()
<< dendl;
} else {
- seq = ceph::io_exerciser::IoSequence::generate_sequence(curseq,
- obj_size_range,
- seqseed.value_or(rng()));
- dout(0) << "== " << exerciser_model->get_oid() << " "
- << curseq << " " << seq->get_name()
- << " ==" <<dendl;
+ if (testrecovery) {
+ seq = ceph::io_exerciser::EcIoSequence::generate_sequence(
+ curseq, obj_size_range, poolK, poolM, seqseed.value_or(rng()));
+ } else {
+ seq = ceph::io_exerciser::IoSequence::generate_sequence(
+ curseq, obj_size_range, seqseed.value_or(rng()));
+ }
+
+ dout(0) << "== " << exerciser_model->get_oid() << " " << curseq << " "
+ << seq->get_name_with_seqseed() << " ==" << dendl;
op = seq->next();
}
} else {
@@ -499,27 +575,30 @@ bool ceph::io_sequence::tester::TestObject::next()
return done;
}
-bool ceph::io_sequence::tester::TestObject::finished()
-{
- return done;
-}
+bool ceph::io_sequence::tester::TestObject::finished() { return done; }
-int ceph::io_sequence::tester::TestObject::get_num_io()
-{
+int ceph::io_sequence::tester::TestObject::get_num_io() {
return exerciser_model->get_num_io();
}
ceph::io_sequence::tester::TestRunner::TestRunner(po::variables_map& vm,
- librados::Rados& rados) :
- rados(rados),
- seed(vm.contains("seed") ? vm["seed"].as<int>() : time(nullptr)),
- rng(ceph::util::random_number_generator<int>(seed)),
- sbs{rng, vm},
- sos{rng, vm},
- spo{rng, vm, rados, vm.contains("dryrun")},
- snt{rng, vm},
- ssr{rng, vm}
-{
+ librados::Rados& rados)
+ : rados(rados),
+ seed(vm.contains("seed") ? vm["seed"].as<int>() : time(nullptr)),
+ rng(ceph::util::random_number_generator<int>(seed)),
+ sbs{rng, vm},
+ sos{rng, vm},
+ spo{rng,
+ vm,
+ rados,
+ vm.contains("dryrun"),
+ vm.contains("allow_pool_autoscaling"),
+ vm.contains("allow_pool_balancer"),
+ vm.contains("allow_pool_deep_scrubbing"),
+ vm.contains("allow_pool_scrubbing"),
+ vm.contains("test_recovery")},
+ snt{rng, vm},
+ ssr{rng, vm} {
dout(0) << "Test using seed " << seed << dendl;
verbose = vm.contains("verbose");
@@ -532,19 +611,23 @@ ceph::io_sequence::tester::TestRunner::TestRunner(po::variables_map& vm,
num_objects = vm["parallel"].as<int>();
object_name = vm["object"].as<std::string>();
interactive = vm.contains("interactive");
+ testrecovery = vm.contains("testrecovery");
+
+ allow_pool_autoscaling = vm.contains("allow_pool_autoscaling");
+ allow_pool_balancer = vm.contains("allow_pool_balancer");
+ allow_pool_deep_scrubbing = vm.contains("allow_pool_deep_scrubbing");
+ allow_pool_scrubbing = vm.contains("allow_pool_scrubbing");
- if (!dryrun)
- {
+ if (!dryrun) {
guard.emplace(boost::asio::make_work_guard(asio));
- thread = make_named_thread("io_thread",[&asio = asio] { asio.run(); });
+ thread = make_named_thread("io_thread", [&asio = asio] { asio.run(); });
}
show_help = vm.contains("help");
show_sequence = vm.contains("listsequence");
}
-ceph::io_sequence::tester::TestRunner::~TestRunner()
-{
+ceph::io_sequence::tester::TestRunner::~TestRunner() {
if (!dryrun) {
guard = std::nullopt;
asio.stop();
@@ -553,34 +636,38 @@ ceph::io_sequence::tester::TestRunner::~TestRunner()
}
}
-void ceph::io_sequence::tester::TestRunner::help()
-{
+void ceph::io_sequence::tester::TestRunner::help() {
std::cout << get_options_description() << std::endl;
for (auto line : usage) {
std::cout << line << std::endl;
}
}
-void ceph::io_sequence::tester::TestRunner::list_sequence()
-{
+void ceph::io_sequence::tester::TestRunner::list_sequence(bool testrecovery) {
// List seqeunces
- std::pair<int,int> obj_size_range = sos.choose();
- for (ceph::io_exerciser::Sequence s
- = ceph::io_exerciser::Sequence::SEQUENCE_BEGIN;
- s < ceph::io_exerciser::Sequence::SEQUENCE_END; ++s) {
- std::unique_ptr<ceph::io_exerciser::IoSequence> seq =
- ceph::io_exerciser::IoSequence::generate_sequence(s,
- obj_size_range,
- seqseed.value_or(rng()));
- dout(0) << s << " " << seq->get_name() << dendl;
+ std::pair<int, int> obj_size_range = sos.choose();
+ ceph::io_exerciser::Sequence s = ceph::io_exerciser::Sequence::SEQUENCE_BEGIN;
+ std::unique_ptr<ceph::io_exerciser::IoSequence> seq;
+ if (testrecovery) {
+ seq = ceph::io_exerciser::EcIoSequence::generate_sequence(
+ s, obj_size_range, spo.getChosenK(), spo.getChosenM(),
+ seqseed.value_or(rng()));
+ } else {
+ seq = ceph::io_exerciser::IoSequence::generate_sequence(
+ s, obj_size_range, seqseed.value_or(rng()));
}
+
+ do {
+ dout(0) << s << " " << seq->get_name_with_seqseed() << dendl;
+ s = seq->getNextSupportedSequenceId();
+ } while (s != ceph::io_exerciser::Sequence::SEQUENCE_END);
}
-std::string ceph::io_sequence::tester::TestRunner::get_token()
-{
- static std::string line;
- static ceph::split split = ceph::split("");
- static ceph::spliterator tokens;
+void ceph::io_sequence::tester::TestRunner::clear_tokens() {
+ tokens = split.end();
+}
+
+std::string ceph::io_sequence::tester::TestRunner::get_token() {
while (line.empty() || tokens == split.end()) {
if (!std::getline(std::cin, line)) {
throw std::runtime_error("End of input");
@@ -591,127 +678,211 @@ std::string ceph::io_sequence::tester::TestRunner::get_token()
return std::string(*tokens++);
}
-uint64_t ceph::io_sequence::tester::TestRunner::get_numeric_token()
-{
+std::optional<std::string>
+ceph::io_sequence::tester::TestRunner ::get_optional_token() {
+ std::optional<std::string> ret = std::nullopt;
+ if (tokens != split.end()) {
+ ret = std::string(*tokens++);
+ }
+ return ret;
+}
+
+uint64_t ceph::io_sequence::tester::TestRunner::get_numeric_token() {
std::string parse_error;
std::string token = get_token();
uint64_t num = strict_iecstrtoll(token, &parse_error);
if (!parse_error.empty()) {
- throw std::runtime_error("Invalid number "+token);
+ throw std::runtime_error("Invalid number " + token);
}
return num;
}
-bool ceph::io_sequence::tester::TestRunner::run_test()
-{
- if (show_help)
- {
+std::optional<uint64_t>
+ceph::io_sequence::tester::TestRunner ::get_optional_numeric_token() {
+ std::string parse_error;
+ std::optional<std::string> token = get_optional_token();
+ if (token) {
+ uint64_t num = strict_iecstrtoll(*token, &parse_error);
+ if (!parse_error.empty()) {
+ throw std::runtime_error("Invalid number " + *token);
+ }
+ return num;
+ }
+
+ return std::optional<uint64_t>(std::nullopt);
+}
+
+bool ceph::io_sequence::tester::TestRunner::run_test() {
+ if (show_help) {
help();
return true;
- }
- else if (show_sequence)
- {
- list_sequence();
+ } else if (show_sequence) {
+ list_sequence(testrecovery);
return true;
- }
- else if (interactive)
- {
+ } else if (interactive) {
return run_interactive_test();
- }
- else
- {
+ } else {
return run_automated_test();
}
}
-bool ceph::io_sequence::tester::TestRunner::run_interactive_test()
-{
+bool ceph::io_sequence::tester::TestRunner::run_interactive_test() {
bool done = false;
std::unique_ptr<ceph::io_exerciser::IoOp> ioop;
std::unique_ptr<ceph::io_exerciser::Model> model;
if (dryrun) {
- model = std::make_unique<ceph::io_exerciser::ObjectModel>(object_name,
- sbs.choose(),
- rng());
+ model = std::make_unique<ceph::io_exerciser::ObjectModel>(
+ object_name, sbs.choose(), rng());
} else {
const std::string pool = spo.choose();
- model = std::make_unique<ceph::io_exerciser::RadosIo>(rados, asio, pool,
- object_name, sbs.choose(),
- rng(), 1, // 1 thread
- lock, cond);
+
+ bufferlist inbl, outbl;
+ auto formatter = std::make_unique<JSONFormatter>(false);
+
+ ceph::messaging::osd::OSDMapRequest osdMapRequest{pool, object_name, ""};
+ int rc = send_mon_command(osdMapRequest, rados, "OSDMapRequest", inbl,
+ &outbl, formatter.get());
+ ceph_assert(rc == 0);
+
+ JSONParser p;
+ bool success = p.parse(outbl.c_str(), outbl.length());
+ ceph_assert(success);
+
+ ceph::messaging::osd::OSDMapReply reply{};
+ reply.decode_json(&p);
+
+ model = std::make_unique<ceph::io_exerciser::RadosIo>(
+ rados, asio, pool, object_name, reply.acting, sbs.choose(), rng(),
+ 1, // 1 thread
+ lock, cond);
}
while (!done) {
const std::string op = get_token();
- if (!op.compare("done") || !op.compare("q") || !op.compare("quit")) {
- ioop = ceph::io_exerciser::IoOp::generate_done();
- } else if (!op.compare("create")) {
- ioop = ceph::io_exerciser::IoOp::generate_create(get_numeric_token());
- } else if (!op.compare("remove") || !op.compare("delete")) {
- ioop = ceph::io_exerciser::IoOp::generate_remove();
- } else if (!op.compare("read")) {
+ if (op == "done" || op == "q" || op == "quit") {
+ ioop = ceph::io_exerciser::DoneOp::generate();
+ } else if (op == "create") {
+ ioop = ceph::io_exerciser::CreateOp::generate(get_numeric_token());
+ } else if (op == "remove" || op == "delete") {
+ ioop = ceph::io_exerciser::RemoveOp::generate();
+ } else if (op == "read") {
uint64_t offset = get_numeric_token();
uint64_t length = get_numeric_token();
- ioop = ceph::io_exerciser::IoOp::generate_read(offset, length);
- } else if (!op.compare("read2")) {
+ ioop = ceph::io_exerciser::SingleReadOp::generate(offset, length);
+ } else if (op == "read2") {
uint64_t offset1 = get_numeric_token();
uint64_t length1 = get_numeric_token();
uint64_t offset2 = get_numeric_token();
uint64_t length2 = get_numeric_token();
- ioop = ceph::io_exerciser::IoOp::generate_read2(offset1, length1,
- offset2, length2);
- } else if (!op.compare("read3")) {
+ ioop = DoubleReadOp::generate(offset1, length1, offset2, length2);
+ } else if (op == "read3") {
uint64_t offset1 = get_numeric_token();
uint64_t length1 = get_numeric_token();
uint64_t offset2 = get_numeric_token();
uint64_t length2 = get_numeric_token();
uint64_t offset3 = get_numeric_token();
uint64_t length3 = get_numeric_token();
- ioop = ceph::io_exerciser::IoOp::generate_read3(offset1, length1,
- offset2, length2,
- offset3, length3);
- } else if (!op.compare("write")) {
+ ioop = TripleReadOp::generate(offset1, length1, offset2, length2, offset3,
+ length3);
+ } else if (op == "write") {
uint64_t offset = get_numeric_token();
uint64_t length = get_numeric_token();
- ioop = ceph::io_exerciser::IoOp::generate_write(offset, length);
- } else if (!op.compare("write2")) {
+ ioop = SingleWriteOp::generate(offset, length);
+ } else if (op == "write2") {
uint64_t offset1 = get_numeric_token();
uint64_t length1 = get_numeric_token();
uint64_t offset2 = get_numeric_token();
uint64_t length2 = get_numeric_token();
- ioop = ceph::io_exerciser::IoOp::generate_write2(offset1, length1,
- offset2, length2);
- } else if (!op.compare("write3")) {
+ ioop = DoubleWriteOp::generate(offset1, length1, offset2, length2);
+ } else if (op == "write3") {
uint64_t offset1 = get_numeric_token();
uint64_t length1 = get_numeric_token();
uint64_t offset2 = get_numeric_token();
uint64_t length2 = get_numeric_token();
uint64_t offset3 = get_numeric_token();
uint64_t length3 = get_numeric_token();
- ioop = ceph::io_exerciser::IoOp::generate_write3(offset1, length1,
- offset2, length2,
- offset3, length3);
+ ioop = TripleWriteOp::generate(offset1, length1, offset2, length2,
+ offset3, length3);
+ } else if (op == "failedwrite") {
+ uint64_t offset = get_numeric_token();
+ uint64_t length = get_numeric_token();
+ ioop = SingleFailedWriteOp::generate(offset, length);
+ } else if (op == "failedwrite2") {
+ uint64_t offset1 = get_numeric_token();
+ uint64_t length1 = get_numeric_token();
+ uint64_t offset2 = get_numeric_token();
+ uint64_t length2 = get_numeric_token();
+ ioop = DoubleFailedWriteOp::generate(offset1, length1, offset2, length2);
+ } else if (op == "failedwrite3") {
+ uint64_t offset1 = get_numeric_token();
+ uint64_t length1 = get_numeric_token();
+ uint64_t offset2 = get_numeric_token();
+ uint64_t length2 = get_numeric_token();
+ uint64_t offset3 = get_numeric_token();
+ uint64_t length3 = get_numeric_token();
+ ioop = TripleFailedWriteOp::generate(offset1, length1, offset2, length2,
+ offset3, length3);
+ } else if (op == "injecterror") {
+ std::string inject_type = get_token();
+ int shard = get_numeric_token();
+ std::optional<int> type = get_optional_numeric_token();
+ std::optional<int> when = get_optional_numeric_token();
+ std::optional<int> duration = get_optional_numeric_token();
+ if (inject_type == "read") {
+ ioop = ceph::io_exerciser::InjectReadErrorOp::generate(shard, type,
+ when, duration);
+ } else if (inject_type == "write") {
+ ioop = ceph::io_exerciser::InjectWriteErrorOp::generate(shard, type,
+ when, duration);
+ } else {
+ clear_tokens();
+ ioop.reset();
+ dout(0) << fmt::format("Invalid error inject {}. No action performed.",
+ inject_type)
+ << dendl;
+ }
+ } else if (op == "clearinject") {
+ std::string inject_type = get_token();
+ int shard = get_numeric_token();
+ std::optional<int> type = get_optional_numeric_token();
+ if (inject_type == "read") {
+ ioop =
+ ceph::io_exerciser::ClearReadErrorInjectOp::generate(shard, type);
+ } else if (inject_type == "write") {
+ ioop =
+ ceph::io_exerciser::ClearWriteErrorInjectOp::generate(shard, type);
+ } else {
+ clear_tokens();
+ ioop.reset();
+ dout(0) << fmt::format("Invalid error inject {}. No action performed.",
+ inject_type)
+ << dendl;
+ }
} else {
- throw std::runtime_error("Invalid operation "+op);
+ clear_tokens();
+ ioop.reset();
+ dout(0) << fmt::format("Invalid op {}. No action performed.", op)
+ << dendl;
}
- dout(0) << ioop->to_string(model->get_block_size()) << dendl;
- model->applyIoOp(*ioop);
- done = ioop->done();
- if (!done) {
- ioop = ceph::io_exerciser::IoOp::generate_barrier();
+ if (ioop) {
+ dout(0) << ioop->to_string(model->get_block_size()) << dendl;
model->applyIoOp(*ioop);
+ done = ioop->getOpType() == ceph::io_exerciser::OpType::Done;
+ if (!done) {
+ ioop = ceph::io_exerciser::BarrierOp::generate();
+ model->applyIoOp(*ioop);
+ }
}
}
return true;
}
-bool ceph::io_sequence::tester::TestRunner::run_automated_test()
-{
+bool ceph::io_sequence::tester::TestRunner::run_automated_test() {
// Create a test for each object
- std::vector<std::shared_ptr<
- ceph::io_sequence::tester::TestObject>> test_objects;
+ std::vector<std::shared_ptr<ceph::io_sequence::tester::TestObject>>
+ test_objects;
for (int obj = 0; obj < num_objects; obj++) {
std::string name;
@@ -721,15 +892,9 @@ bool ceph::io_sequence::tester::TestRunner::run_automated_test()
name = object_name + std::to_string(obj);
}
test_objects.push_back(
- std::make_shared<ceph::io_sequence::tester::TestObject>(
- name,
- rados, asio,
- sbs, spo, sos, snt, ssr,
- rng, lock, cond,
- dryrun, verbose,
- seqseed
- )
- );
+ std::make_shared<ceph::io_sequence::tester::TestObject>(
+ name, rados, asio, sbs, spo, sos, snt, ssr, rng, lock, cond, dryrun,
+ verbose, seqseed, testrecovery));
}
if (!dryrun) {
rados.wait_for_latest_osdmap();
@@ -748,16 +913,15 @@ bool ceph::io_sequence::tester::TestRunner::run_automated_test()
for (auto obj = test_objects.begin(); obj != test_objects.end(); ++obj) {
std::shared_ptr<ceph::io_sequence::tester::TestObject> to = *obj;
if (!to->finished()) {
- lock.lock();
- bool ready = to->readyForIo();
- lock.unlock();
- if (ready)
- {
- to->next();
- started_io = true;
- } else {
- need_wait = true;
- }
+ lock.lock();
+ bool ready = to->readyForIo();
+ lock.unlock();
+ if (ready) {
+ to->next();
+ started_io = true;
+ } else {
+ need_wait = true;
+ }
}
}
if (!started_io && need_wait) {
@@ -767,8 +931,7 @@ bool ceph::io_sequence::tester::TestRunner::run_automated_test()
std::shared_ptr<ceph::io_sequence::tester::TestObject> to = *obj;
if (!to->finished()) {
need_wait = !to->readyForIo();
- if (!need_wait)
- {
+ if (!need_wait) {
break;
}
}
@@ -788,18 +951,16 @@ bool ceph::io_sequence::tester::TestRunner::run_automated_test()
return true;
}
-int main(int argc, char **argv)
-{
+int main(int argc, char** argv) {
auto args = argv_to_vec(argc, argv);
env_to_vec(args);
auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
- CODE_ENVIRONMENT_UTILITY, 0);
+ CODE_ENVIRONMENT_UTILITY, 0);
common_init_finish(cct.get());
po::variables_map vm;
int rc = parse_io_seq_options(vm, argc, argv);
- if (rc != 0)
- {
+ if (rc != 0) {
return rc;
}
@@ -814,7 +975,7 @@ int main(int argc, char **argv)
std::unique_ptr<ceph::io_sequence::tester::TestRunner> runner;
try {
runner = std::make_unique<ceph::io_sequence::tester::TestRunner>(vm, rados);
- } catch(const po::error& e) {
+ } catch (const po::error& e) {
return 1;
}
runner->run_test();
diff --git a/src/test/osd/ceph_test_rados_io_sequence.h b/src/test/osd/ceph_test_rados_io_sequence.h
index 4e21d025700..9af5f706b2f 100644
--- a/src/test/osd/ceph_test_rados_io_sequence.h
+++ b/src/test/osd/ceph_test_rados_io_sequence.h
@@ -1,34 +1,36 @@
+#include <boost/program_options.hpp>
+#include <optional>
#include <utility>
-#include "include/random.h"
-
-#include "global/global_init.h"
-#include "global/global_context.h"
-
#include "common/io_exerciser/IoOp.h"
#include "common/io_exerciser/IoSequence.h"
#include "common/io_exerciser/Model.h"
-
+#include "common/split.h"
+#include "global/global_context.h"
+#include "global/global_init.h"
+#include "include/random.h"
#include "librados/librados_asio.h"
#include <boost/asio/io_context.hpp>
#include <boost/program_options.hpp>
+#include <optional>
+
/* Overview
*
* class ProgramOptionSelector
- * Base class for selector objects below with common code for
+ * Base class for selector objects below with common code for
* selecting options
- *
+ *
* class SelectObjectSize
* Selects min and max object sizes for a test
*
* class SelectErasureKM
* Selects an EC k and m value for a test
- *
+ *
* class SelectErasurePlugin
* Selects an plugin for a test
- *
+ *
* class SelectECPool
* Selects an EC pool (plugin,k and m) for a test. Also creates the
* pool as well.
@@ -58,287 +60,279 @@
namespace po = boost::program_options;
-namespace ceph
-{
- namespace io_sequence::tester
- {
- // Choices for min and max object size
- inline constexpr size_t objectSizeSize = 10;
- inline constexpr std::array<std::pair<int,int>,objectSizeSize>
- objectSizeChoices = {{
- {1,32}, // Default - best for boundary checking
- {12,14},
- {28,30},
- {36,38},
- {42,44},
- {52,54},
- {66,68},
- {72,74},
- {83,83},
- {97,97}
- }};
-
- // Choices for block size
- inline constexpr int blockSizeSize = 5;
- inline constexpr std::array<uint64_t, blockSizeSize> blockSizeChoices = {{
- 2048, // Default - test boundaries for EC 4K chunk size
- 512,
- 3767,
- 4096,
- 32768
- }};
-
- // Choices for number of threads
- inline constexpr int threadArraySize = 4;
- inline constexpr std::array<int, threadArraySize> threadCountChoices = {{
- 1, // Default
- 2,
- 4,
- 8
- }};
-
- // Choices for EC k+m profile
- inline constexpr int kmSize = 6;
- inline constexpr std::array<std::pair<int,int>, kmSize> kmChoices = {{
- {2,2}, // Default - reasonable coverage
- {2,1},
- {2,3},
- {3,2},
- {4,2},
- {5,1}
- }};
-
- // Choices for EC chunk size
- inline constexpr int chunkSizeSize = 3;
- inline constexpr std::array<uint64_t, chunkSizeSize> chunkSizeChoices = {{
- 4*1024,
- 64*1024,
- 256*1024
- }};
-
- // Choices for plugin
- inline constexpr int pluginListSize = 2;
- inline constexpr std::array<std::string_view,
- pluginListSize> pluginChoices = {{
- "jerasure",
- "isa"
- }};
-
- inline constexpr std::array<std::pair<ceph::io_exerciser::Sequence,
- ceph::io_exerciser::Sequence>,
- 0> sequencePairs = {{}};
-
- inline constexpr std::array<std::string, 0> poolChoices = {{}};
-
- template <typename T, int N, const std::array<T, N>& Ts>
- class ProgramOptionSelector
- {
- public:
- ProgramOptionSelector(ceph::util::random_number_generator<int>& rng,
- po::variables_map vm,
- const std::string& option_name,
- bool set_forced,
- bool select_first
- );
- virtual ~ProgramOptionSelector() = default;
- bool isForced();
- virtual const T choose();
-
- protected:
- ceph::util::random_number_generator<int>& rng;
- static constexpr std::array<T, N> choices = Ts;
-
- std::optional<T> force_value;
- std::optional<T> first_value;
-
- std::string option_name;
- };
-
- class SelectObjectSize
- : public ProgramOptionSelector<std::pair<int, int>,
- io_sequence::tester::objectSizeSize,
- io_sequence::tester::objectSizeChoices>
- {
- public:
- SelectObjectSize(ceph::util::random_number_generator<int>& rng,
- po::variables_map vm);
- };
-
- class SelectBlockSize
- : public ProgramOptionSelector<uint64_t,
- io_sequence::tester::blockSizeSize,
- io_sequence::tester::blockSizeChoices>
- {
- public:
- SelectBlockSize(ceph::util::random_number_generator<int>& rng,
- po::variables_map vm);
- };
-
- class SelectNumThreads
- : public ProgramOptionSelector<int,
- io_sequence::tester::threadArraySize,
- io_sequence::tester::threadCountChoices>
- {
- public:
- SelectNumThreads(ceph::util::random_number_generator<int>& rng,
- po::variables_map vm);
- };
-
- class SelectSeqRange
- : public ProgramOptionSelector<std::pair<ceph::io_exerciser::Sequence,
- ceph::io_exerciser::Sequence>,
- 0, io_sequence::tester::sequencePairs>
- {
- public:
- SelectSeqRange(ceph::util::random_number_generator<int>& rng,
- po::variables_map vm);
-
- const std::pair<ceph::io_exerciser::Sequence,
- ceph::io_exerciser::Sequence> choose() override;
- };
-
- class SelectErasureKM
- : public ProgramOptionSelector<std::pair<int,int>,
- io_sequence::tester::kmSize,
- io_sequence::tester::kmChoices>
- {
- public:
- SelectErasureKM(ceph::util::random_number_generator<int>& rng,
+namespace ceph {
+namespace io_sequence::tester {
+// Choices for min and max object size
+inline constexpr size_t objectSizeSize = 10;
+inline constexpr std::array<std::pair<int, int>, objectSizeSize>
+ objectSizeChoices = {{{1, 32}, // Default - best for boundary checking
+ {12, 14},
+ {28, 30},
+ {36, 38},
+ {42, 44},
+ {52, 54},
+ {66, 68},
+ {72, 74},
+ {83, 83},
+ {97, 97}}};
+
+// Choices for block size
+inline constexpr int blockSizeSize = 5;
+inline constexpr std::array<uint64_t, blockSizeSize> blockSizeChoices = {
+ {2048, // Default - test boundaries for EC 4K chunk size
+ 512, 3767, 4096, 32768}};
+
+// Choices for number of threads
+inline constexpr int threadArraySize = 4;
+inline constexpr std::array<int, threadArraySize> threadCountChoices = {
+ {1, // Default
+ 2, 4, 8}};
+
+// Choices for EC k+m profile
+inline constexpr int kmSize = 6;
+inline constexpr std::array<std::pair<int, int>, kmSize> kmChoices = {
+ {{2, 2}, // Default - reasonable coverage
+ {2, 1},
+ {2, 3},
+ {3, 2},
+ {4, 2},
+ {5, 1}}};
+
+// Choices for EC chunk size
+inline constexpr int chunkSizeSize = 3;
+inline constexpr std::array<uint64_t, chunkSizeSize> chunkSizeChoices = {
+ {4 * 1024, 64 * 1024, 256 * 1024}};
+
+// Choices for plugin
+inline constexpr int pluginListSize = 2;
+inline constexpr std::array<std::string_view, pluginListSize> pluginChoices = {
+ {"jerasure", "isa"}};
+
+inline constexpr std::array<
+ std::pair<ceph::io_exerciser::Sequence, ceph::io_exerciser::Sequence>, 0>
+ sequencePairs = {{}};
+
+inline constexpr std::array<std::string, 0> poolChoices = {{}};
+
+template <typename T, int N, const std::array<T, N>& Ts>
+class ProgramOptionSelector {
+ public:
+ ProgramOptionSelector(ceph::util::random_number_generator<int>& rng,
+ po::variables_map vm, const std::string& option_name,
+ bool set_forced, bool select_first);
+ virtual ~ProgramOptionSelector() = default;
+ bool isForced();
+ virtual const T choose();
+
+ protected:
+ ceph::util::random_number_generator<int>& rng;
+ static constexpr std::array<T, N> choices = Ts;
+
+ std::optional<T> force_value;
+ std::optional<T> first_value;
+
+ std::string option_name;
+};
+
+class SelectObjectSize
+ : public ProgramOptionSelector<std::pair<int, int>,
+ io_sequence::tester::objectSizeSize,
+ io_sequence::tester::objectSizeChoices> {
+ public:
+ SelectObjectSize(ceph::util::random_number_generator<int>& rng,
+ po::variables_map vm);
+};
+
+class SelectBlockSize
+ : public ProgramOptionSelector<uint64_t, io_sequence::tester::blockSizeSize,
+ io_sequence::tester::blockSizeChoices> {
+ public:
+ SelectBlockSize(ceph::util::random_number_generator<int>& rng,
+ po::variables_map vm);
+};
+
+class SelectNumThreads
+ : public ProgramOptionSelector<int, io_sequence::tester::threadArraySize,
+ io_sequence::tester::threadCountChoices> {
+ public:
+ SelectNumThreads(ceph::util::random_number_generator<int>& rng,
+ po::variables_map vm);
+};
+
+class SelectSeqRange
+ : public ProgramOptionSelector<
+ std::pair<ceph::io_exerciser::Sequence, ceph::io_exerciser::Sequence>,
+ 0, io_sequence::tester::sequencePairs> {
+ public:
+ SelectSeqRange(ceph::util::random_number_generator<int>& rng,
+ po::variables_map vm);
+
+ const std::pair<ceph::io_exerciser::Sequence, ceph::io_exerciser::Sequence>
+ choose() override;
+};
+
+class SelectErasureKM
+ : public ProgramOptionSelector<std::pair<int, int>,
+ io_sequence::tester::kmSize,
+ io_sequence::tester::kmChoices> {
+ public:
+ SelectErasureKM(ceph::util::random_number_generator<int>& rng,
+ po::variables_map vm);
+};
+
+class SelectErasurePlugin
+ : public ProgramOptionSelector<std::string_view,
+ io_sequence::tester::pluginListSize,
+ io_sequence::tester::pluginChoices> {
+ public:
+ SelectErasurePlugin(ceph::util::random_number_generator<int>& rng,
po::variables_map vm);
- };
-
- class SelectErasurePlugin
- : public ProgramOptionSelector<std::string_view,
- io_sequence::tester::pluginListSize,
- io_sequence::tester::pluginChoices>
- {
- public:
- SelectErasurePlugin(ceph::util::random_number_generator<int>& rng,
- po::variables_map vm);
- };
-
- class SelectErasureChunkSize
- : public ProgramOptionSelector<uint64_t,
- io_sequence::tester::chunkSizeSize,
- io_sequence::tester::chunkSizeChoices>
- {
- public:
- SelectErasureChunkSize(ceph::util::random_number_generator<int>& rng, po::variables_map vm);
- };
-
- class SelectECPool
- : public ProgramOptionSelector<std::string,
- 0,
- io_sequence::tester::poolChoices>
- {
- public:
- SelectECPool(ceph::util::random_number_generator<int>& rng,
- po::variables_map vm,
- librados::Rados& rados,
- bool dry_run);
- const std::string choose() override;
-
- private:
- void create_pool(librados::Rados& rados,
- const std::string& pool_name,
- const std::string& plugin,
- uint64_t chunk_size,
- int k, int m);
-
- protected:
- librados::Rados& rados;
- bool dry_run;
-
- SelectErasureKM skm;
- SelectErasurePlugin spl;
- SelectErasureChunkSize scs;
- };
-
- class TestObject
- {
- public:
- TestObject( const std::string oid,
- librados::Rados& rados,
- boost::asio::io_context& asio,
- ceph::io_sequence::tester::SelectBlockSize& sbs,
- ceph::io_sequence::tester::SelectECPool& spl,
- ceph::io_sequence::tester::SelectObjectSize& sos,
- ceph::io_sequence::tester::SelectNumThreads& snt,
- ceph::io_sequence::tester::SelectSeqRange& ssr,
- ceph::util::random_number_generator<int>& rng,
- ceph::mutex& lock,
- ceph::condition_variable& cond,
- bool dryrun,
- bool verbose,
- std::optional<int> seqseed);
-
- int get_num_io();
- bool readyForIo();
- bool next();
- bool finished();
-
- protected:
- std::unique_ptr<ceph::io_exerciser::Model> exerciser_model;
- std::pair<int,int> obj_size_range;
- std::pair<ceph::io_exerciser::Sequence,
- ceph::io_exerciser::Sequence> seq_range;
- ceph::io_exerciser::Sequence curseq;
- std::unique_ptr<ceph::io_exerciser::IoSequence> seq;
- std::unique_ptr<ceph::io_exerciser::IoOp> op;
- bool done;
- ceph::util::random_number_generator<int>& rng;
- bool verbose;
- std::optional<int> seqseed;
- };
-
- class TestRunner
- {
- public:
- TestRunner(po::variables_map& vm, librados::Rados& rados);
- ~TestRunner();
-
- bool run_test();
-
- private:
- librados::Rados& rados;
- int seed;
- ceph::util::random_number_generator<int> rng;
-
- ceph::io_sequence::tester::SelectBlockSize sbs;
- ceph::io_sequence::tester::SelectObjectSize sos;
- ceph::io_sequence::tester::SelectECPool spo;
- ceph::io_sequence::tester::SelectNumThreads snt;
- ceph::io_sequence::tester::SelectSeqRange ssr;
-
- boost::asio::io_context asio;
- std::thread thread;
- std::optional<boost::asio::executor_work_guard<
- boost::asio::io_context::executor_type>> guard;
- ceph::mutex lock = ceph::make_mutex("RadosIo::lock");
- ceph::condition_variable cond;
-
- bool input_valid;
-
- bool verbose;
- bool dryrun;
- std::optional<int> seqseed;
- bool interactive;
-
- bool show_sequence;
- bool show_help;
-
- int num_objects;
- std::string object_name;
-
- std::string get_token();
- uint64_t get_numeric_token();
-
- bool run_automated_test();
-
- bool run_interactive_test();
-
- void help();
- void list_sequence();
- };
- }
-}
+};
+
+class SelectErasureChunkSize
+ : public ProgramOptionSelector<uint64_t, io_sequence::tester::chunkSizeSize,
+ io_sequence::tester::chunkSizeChoices> {
+ public:
+ SelectErasureChunkSize(ceph::util::random_number_generator<int>& rng,
+ po::variables_map vm);
+};
+
+class SelectECPool
+ : public ProgramOptionSelector<std::string, 0,
+ io_sequence::tester::poolChoices> {
+ public:
+ SelectECPool(ceph::util::random_number_generator<int>& rng,
+ po::variables_map vm, librados::Rados& rados, bool dry_run,
+ bool allow_pool_autoscaling, bool allow_pool_balancer,
+ bool allow_pool_deep_scrubbing, bool allow_pool_scrubbing,
+ bool test_recovery);
+ const std::string choose() override;
+
+ bool get_allow_pool_autoscaling() { return allow_pool_autoscaling; }
+ bool get_allow_pool_balancer() { return allow_pool_balancer; }
+ bool get_allow_pool_deep_scrubbing() { return allow_pool_deep_scrubbing; }
+ bool get_allow_pool_scrubbing() { return allow_pool_scrubbing; }
+ int getChosenK() const { return k; }
+ int getChosenM() const { return m; }
+
+ private:
+ void create_pool(librados::Rados& rados, const std::string& pool_name,
+ const std::string& plugin, uint64_t chunk_size, int k,
+ int m);
+
+ protected:
+ librados::Rados& rados;
+ bool dry_run;
+ bool allow_pool_autoscaling;
+ bool allow_pool_balancer;
+ bool allow_pool_deep_scrubbing;
+ bool allow_pool_scrubbing;
+ bool test_recovery;
+ int k;
+ int m;
+
+ SelectErasureKM skm;
+ SelectErasurePlugin spl;
+ SelectErasureChunkSize scs;
+};
+
+class TestObject {
+ public:
+ TestObject(const std::string oid, librados::Rados& rados,
+ boost::asio::io_context& asio,
+ ceph::io_sequence::tester::SelectBlockSize& sbs,
+ ceph::io_sequence::tester::SelectECPool& spl,
+ ceph::io_sequence::tester::SelectObjectSize& sos,
+ ceph::io_sequence::tester::SelectNumThreads& snt,
+ ceph::io_sequence::tester::SelectSeqRange& ssr,
+ ceph::util::random_number_generator<int>& rng, ceph::mutex& lock,
+ ceph::condition_variable& cond, bool dryrun, bool verbose,
+ std::optional<int> seqseed, bool testRecovery);
+
+ int get_num_io();
+ bool readyForIo();
+ bool next();
+ bool finished();
+
+ protected:
+ std::unique_ptr<ceph::io_exerciser::Model> exerciser_model;
+ std::pair<int, int> obj_size_range;
+ std::pair<ceph::io_exerciser::Sequence, ceph::io_exerciser::Sequence>
+ seq_range;
+ ceph::io_exerciser::Sequence curseq;
+ std::unique_ptr<ceph::io_exerciser::IoSequence> seq;
+ std::unique_ptr<ceph::io_exerciser::IoOp> op;
+ bool done;
+ ceph::util::random_number_generator<int>& rng;
+ bool verbose;
+ std::optional<int> seqseed;
+ int poolK;
+ int poolM;
+ bool testrecovery;
+};
+
+class TestRunner {
+ public:
+ TestRunner(po::variables_map& vm, librados::Rados& rados);
+ ~TestRunner();
+
+ bool run_test();
+
+ private:
+ librados::Rados& rados;
+ int seed;
+ ceph::util::random_number_generator<int> rng;
+
+ ceph::io_sequence::tester::SelectBlockSize sbs;
+ ceph::io_sequence::tester::SelectObjectSize sos;
+ ceph::io_sequence::tester::SelectECPool spo;
+ ceph::io_sequence::tester::SelectNumThreads snt;
+ ceph::io_sequence::tester::SelectSeqRange ssr;
+
+ boost::asio::io_context asio;
+ std::thread thread;
+ std::optional<
+ boost::asio::executor_work_guard<boost::asio::io_context::executor_type>>
+ guard;
+ ceph::mutex lock = ceph::make_mutex("RadosIo::lock");
+ ceph::condition_variable cond;
+
+ bool input_valid;
+
+ bool verbose;
+ bool dryrun;
+ std::optional<int> seqseed;
+ bool interactive;
+
+ bool testrecovery;
+
+ bool allow_pool_autoscaling;
+ bool allow_pool_balancer;
+ bool allow_pool_deep_scrubbing;
+ bool allow_pool_scrubbing;
+
+ bool show_sequence;
+ bool show_help;
+
+ int num_objects;
+ std::string object_name;
+
+ std::string line;
+ ceph::split split = ceph::split("");
+ ceph::spliterator tokens;
+
+ void clear_tokens();
+ std::string get_token();
+ std::optional<std::string> get_optional_token();
+ uint64_t get_numeric_token();
+ std::optional<uint64_t> get_optional_numeric_token();
+
+ bool run_automated_test();
+
+ bool run_interactive_test();
+
+ void help();
+ void list_sequence(bool testrecovery);
+};
+} // namespace io_sequence::tester
+} // namespace ceph
diff --git a/src/test/pybind/pytest.ini b/src/test/pybind/pytest.ini
index dccf2a346dc..97569e88299 100644
--- a/src/test/pybind/pytest.ini
+++ b/src/test/pybind/pytest.ini
@@ -7,3 +7,4 @@ markers =
stats
tier
watch
+ wait
diff --git a/src/test/pybind/test_rados.py b/src/test/pybind/test_rados.py
index cb2a4f96101..25423bd8dcb 100644
--- a/src/test/pybind/test_rados.py
+++ b/src/test/pybind/test_rados.py
@@ -207,7 +207,7 @@ class TestRados(object):
def test_get_fsid(self):
fsid = self.rados.get_fsid()
- assert re.match('[0-9a-f\-]{36}', fsid, re.I)
+ assert re.match(r'[0-9a-f\-]{36}', fsid, re.I)
def test_blocklist_add(self):
self.rados.blocklist_add("1.2.3.4/123", 1)
diff --git a/src/test/rgw/rgw_multi/tests.py b/src/test/rgw/rgw_multi/tests.py
index d95feb5aa95..433cd034fe0 100644
--- a/src/test/rgw/rgw_multi/tests.py
+++ b/src/test/rgw/rgw_multi/tests.py
@@ -15,6 +15,7 @@ import boto
import boto.s3.connection
from boto.s3.website import WebsiteConfiguration
from boto.s3.cors import CORSConfiguration
+from botocore.exceptions import ClientError
from nose.tools import eq_ as eq
from nose.tools import assert_not_equal, assert_equal, assert_true, assert_false
@@ -3638,4 +3639,23 @@ def test_copy_object_different_bucket():
CopySource = source_bucket.name + '/' + objname)
zonegroup_bucket_checkpoint(zonegroup_conns, dest_bucket.name)
-
+
+def test_bucket_create_location_constraint():
+ for zonegroup in realm.current_period.zonegroups:
+ zonegroup_conns = ZonegroupConns(zonegroup)
+ for zg in realm.current_period.zonegroups:
+ z = zonegroup_conns.rw_zones[0]
+ bucket_name = gen_bucket_name()
+ if zg.name == zonegroup.name:
+ # my zonegroup should pass
+ z.s3_client.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={'LocationConstraint': zg.name})
+ # check bucket location
+ response = z.s3_client.get_bucket_location(Bucket=bucket_name)
+ assert_equal(response['LocationConstraint'], zg.name)
+ else:
+ # other zonegroup should fail with 400
+ e = assert_raises(ClientError,
+ z.s3_client.create_bucket,
+ Bucket=bucket_name,
+ CreateBucketConfiguration={'LocationConstraint': zg.name})
+ assert e.response['ResponseMetadata']['HTTPStatusCode'] == 400
diff --git a/src/test/rgw/test_rgw_iam_policy.cc b/src/test/rgw/test_rgw_iam_policy.cc
index 7dadb7812ff..1d13c2aa013 100644
--- a/src/test/rgw/test_rgw_iam_policy.cc
+++ b/src/test/rgw/test_rgw_iam_policy.cc
@@ -75,6 +75,8 @@ using rgw::IAM::s3GetObjectTagging;
using rgw::IAM::s3GetObjectVersion;
using rgw::IAM::s3GetObjectVersionTagging;
using rgw::IAM::s3GetObjectVersionTorrent;
+using rgw::IAM::s3GetObjectAttributes;
+using rgw::IAM::s3GetObjectVersionAttributes;
using rgw::IAM::s3GetPublicAccessBlock;
using rgw::IAM::s3GetReplicationConfiguration;
using rgw::IAM::s3ListAllMyBuckets;
@@ -419,6 +421,8 @@ TEST_F(PolicyTest, Parse3) {
act2[s3GetObjectVersionAcl] = 1;
act2[s3GetObjectTorrent] = 1;
act2[s3GetObjectVersionTorrent] = 1;
+ act2[s3GetObjectAttributes] = 1;
+ act2[s3GetObjectVersionAttributes] = 1;
act2[s3GetAccelerateConfiguration] = 1;
act2[s3GetBucketAcl] = 1;
act2[s3GetBucketOwnershipControls] = 1;
@@ -487,6 +491,8 @@ TEST_F(PolicyTest, Eval3) {
s3allow[s3GetObjectVersion] = 1;
s3allow[s3GetObjectAcl] = 1;
s3allow[s3GetObjectVersionAcl] = 1;
+ s3allow[s3GetObjectAttributes] = 1;
+ s3allow[s3GetObjectVersionAttributes] = 1;
s3allow[s3GetObjectTorrent] = 1;
s3allow[s3GetObjectVersionTorrent] = 1;
s3allow[s3GetAccelerateConfiguration] = 1;
@@ -883,6 +889,8 @@ TEST_F(ManagedPolicyTest, AmazonS3ReadOnlyAccess)
act[s3GetObjectVersionAcl] = 1;
act[s3GetObjectTorrent] = 1;
act[s3GetObjectVersionTorrent] = 1;
+ act[s3GetObjectAttributes] = 1;
+ act[s3GetObjectVersionAttributes] = 1;
act[s3GetAccelerateConfiguration] = 1;
act[s3GetBucketAcl] = 1;
act[s3GetBucketOwnershipControls] = 1;
diff --git a/src/test/test_ipaddr.cc b/src/test/test_ipaddr.cc
index 49038815318..21df1d4056b 100644
--- a/src/test/test_ipaddr.cc
+++ b/src/test/test_ipaddr.cc
@@ -995,3 +995,158 @@ TEST(pick_address, ipv4_ipv6_enabled2)
ASSERT_EQ(-1, r);
}
}
+
+// Test for IPv4 address
+TEST(is_addr_in_subnet, ipv4)
+{
+ std::string public_network = "10.1.1.0/24";
+ entity_addr_t addr;
+ addr.parse("10.1.1.2", nullptr);
+
+ boost::intrusive_ptr<CephContext> cct(new CephContext(CEPH_ENTITY_TYPE_OSD), false);
+ cct->_conf._clear_safe_to_start_threads();
+ cct->_conf.set_val("ms_bind_ipv4", "true");
+ cct->_conf.set_val("ms_bind_ipv6", "false");
+
+ bool r = is_addr_in_subnet(cct.get(), public_network, addr);
+ ASSERT_EQ(true, r);
+}
+
+// Test for IPv6 address
+TEST(is_addr_in_subnet, ipv6)
+{
+ std::string public_network = "2001:db8::/64";
+ entity_addr_t addr;
+ addr.parse("2001:db8::1", nullptr);
+
+ boost::intrusive_ptr<CephContext> cct(new CephContext(CEPH_ENTITY_TYPE_OSD), false);
+ cct->_conf._clear_safe_to_start_threads();
+ cct->_conf.set_val("ms_bind_ipv6", "true");
+ cct->_conf.set_val("ms_bind_ipv4", "false");
+
+ bool r = is_addr_in_subnet(cct.get(), public_network, addr);
+ ASSERT_EQ(true, r);
+}
+
+// Test for invalid address
+TEST(is_addr_in_subnet, invalid_address)
+{
+ std::string public_network = "10.1.1.0/24";
+ entity_addr_t addr;
+ addr.parse("192.168.1.1", nullptr);
+
+ boost::intrusive_ptr<CephContext> cct(new CephContext(CEPH_ENTITY_TYPE_OSD), false);
+ cct->_conf._clear_safe_to_start_threads();
+ cct->_conf.set_val("ms_bind_ipv4", "true");
+ cct->_conf.set_val("ms_bind_ipv6", "false");
+
+ bool r = is_addr_in_subnet(cct.get(), public_network, addr);
+ ASSERT_EQ(false, r);
+}
+
+// Test for malformed address
+TEST(is_addr_in_subnet, malformed_address)
+{
+ std::string public_network = "10.1.1.0/24";
+ entity_addr_t addr;
+ addr.parse("invalid_address", nullptr);
+
+ boost::intrusive_ptr<CephContext> cct(new CephContext(CEPH_ENTITY_TYPE_OSD), false);
+ cct->_conf._clear_safe_to_start_threads();
+ cct->_conf.set_val("ms_bind_ipv4", "true");
+ cct->_conf.set_val("ms_bind_ipv6", "false");
+
+ // Test with a malformed address
+ bool r = is_addr_in_subnet(cct.get(), public_network, addr);
+ ASSERT_EQ(false, r);
+}
+
+TEST(is_addr_in_subnet, boundary_ipv4)
+{
+ std::string public_network = "10.1.1.0/24";
+ entity_addr_t addr_low;
+ addr_low.parse("10.1.1.0", nullptr);
+ entity_addr_t addr_high;
+ addr_high.parse("10.1.1.255", nullptr);
+ entity_addr_t addr_out;
+ addr_out.parse("10.1.2.0", nullptr);
+
+ boost::intrusive_ptr<CephContext> cct(new CephContext(CEPH_ENTITY_TYPE_OSD), false);
+ cct->_conf._clear_safe_to_start_threads();
+ cct->_conf.set_val("ms_bind_ipv4", "true");
+ cct->_conf.set_val("ms_bind_ipv6", "false");
+
+ ASSERT_TRUE(is_addr_in_subnet(cct.get(), public_network, addr_low));
+ ASSERT_TRUE(is_addr_in_subnet(cct.get(), public_network, addr_high));
+ ASSERT_FALSE(is_addr_in_subnet(cct.get(), public_network, addr_out));
+}
+
+TEST(is_addr_in_subnet, boundary_ipv6)
+{
+ std::string public_network = "2001:db8::/64";
+ entity_addr_t addr_low;
+ addr_low.parse("2001:db8::", nullptr);
+ entity_addr_t addr_high;
+ addr_high.parse("2001:db8:0:0:ffff:ffff:ffff:ffff", nullptr);
+ entity_addr_t addr_out;
+ addr_out.parse("2001:db9::", nullptr);
+
+ boost::intrusive_ptr<CephContext> cct(new CephContext(CEPH_ENTITY_TYPE_OSD), false);
+ cct->_conf._clear_safe_to_start_threads();
+ cct->_conf.set_val("ms_bind_ipv6", "true");
+ cct->_conf.set_val("ms_bind_ipv4", "false");
+
+ ASSERT_TRUE(is_addr_in_subnet(cct.get(), public_network, addr_low));
+ ASSERT_TRUE(is_addr_in_subnet(cct.get(), public_network, addr_high));
+ ASSERT_FALSE(is_addr_in_subnet(cct.get(), public_network, addr_out));
+}
+
+TEST(is_addr_in_subnet, overlapping_subnets)
+{
+ std::string public_network_1 = "10.1.1.0/24";
+ std::string public_network_2 = "10.1.2.0/24";
+ entity_addr_t addr;
+ addr.parse("10.1.1.5", nullptr);
+
+ boost::intrusive_ptr<CephContext> cct(new CephContext(CEPH_ENTITY_TYPE_OSD), false);
+ cct->_conf._clear_safe_to_start_threads();
+ cct->_conf.set_val("ms_bind_ipv4", "true");
+ cct->_conf.set_val("ms_bind_ipv6", "false");
+
+ ASSERT_TRUE(is_addr_in_subnet(cct.get(), public_network_1, addr));
+ ASSERT_FALSE(is_addr_in_subnet(cct.get(), public_network_2, addr));
+}
+
+TEST(is_addr_in_subnet, mismatched_family)
+{
+ std::string public_network_1 = "2001:db8::/64";
+ entity_addr_t addr_1;
+ addr_1.parse("10.1.1.5", nullptr);
+
+ std::string public_network_2 = "10.1.1.0/24";
+ entity_addr_t addr_2;
+ addr_2.parse("2001:db8::1", nullptr);
+
+ boost::intrusive_ptr<CephContext> cct(new CephContext(CEPH_ENTITY_TYPE_OSD), false);
+ cct->_conf._clear_safe_to_start_threads();
+ cct->_conf.set_val("ms_bind_ipv4", "true");
+ cct->_conf.set_val("ms_bind_ipv6", "true");
+
+ ASSERT_FALSE(is_addr_in_subnet(cct.get(), public_network_1, addr_1));
+ ASSERT_FALSE(is_addr_in_subnet(cct.get(), public_network_2, addr_2));
+}
+
+TEST(is_addr_in_subnet, invalid_subnets)
+{
+ std::string public_network_1 = "10.1.1.0/33";
+ std::string public_network_2 = "25.0.0.99/10";
+ entity_addr_t addr;
+ addr.parse("10.1.1.2", nullptr);
+
+ boost::intrusive_ptr<CephContext> cct(new CephContext(CEPH_ENTITY_TYPE_OSD), false);
+ cct->_conf._clear_safe_to_start_threads();
+
+ ASSERT_FALSE(is_addr_in_subnet(cct.get(), public_network_1, addr)); // Invalid prefix
+ ASSERT_FALSE(is_addr_in_subnet(cct.get(), public_network_2, addr)); // Invalid subnet string
+}
+
diff --git a/src/vstart.sh b/src/vstart.sh
index b445e250a00..a992f33c856 100755
--- a/src/vstart.sh
+++ b/src/vstart.sh
@@ -159,6 +159,7 @@ smallmds=0
short=0
crimson=0
ec=0
+cephexporter=0
cephadm=0
parallel=true
restart=1
@@ -233,6 +234,7 @@ options:
-G disable Kerberos/GSSApi authentication
--hitset <pool> <hit_set_type>: enable hitset tracking
-e : create an erasure pool
+ --cephexporter: start the ceph-exporter daemon
-o config add extra config parameters to all sections
--rgw_port specify ceph rgw http listen port
--rgw_frontend specify the rgw frontend configuration
@@ -372,6 +374,9 @@ case $1 in
-e)
ec=1
;;
+ --cephexporter)
+ cephexporter=1
+ ;;
--new | -n)
new=1
;;
@@ -1140,6 +1145,17 @@ EOF
fi
}
+start_cephexporter() {
+ debug echo "Starting Ceph exporter daemon..."
+
+ # Define socket directory for the exporter
+ # Start the exporter daemon
+ prunb ceph-exporter \
+ -c "$conf_fn" \
+ --sock-dir "$CEPH_ASOK_DIR" \
+ --addrs "$IP"
+}
+
start_osd() {
if [ $inc_osd_num -gt 0 ]; then
old_maxosd=$($CEPH_BIN/ceph osd getmaxosd | sed -e 's/max_osd = //' -e 's/ in epoch.*//')
@@ -1738,6 +1754,10 @@ if [ $CEPH_NUM_MDS -gt 0 ]; then
ceph_adm fs authorize \* "client.fs" / rwp >> "$keyring_fn"
fi
+if [ "$cephexporter" -eq 1 ]; then
+ start_cephexporter
+fi
+
# Don't set max_mds until all the daemons are started, otherwise
# the intended standbys might end up in active roles.
if [ "$CEPH_MAX_MDS" -gt 1 ]; then