summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/auth/Auth.h60
-rw-r--r--src/auth/Crypto.cc17
-rw-r--r--src/auth/Crypto.h2
-rw-r--r--src/auth/cephx/CephxKeyServer.cc10
-rw-r--r--src/auth/cephx/CephxKeyServer.h40
-rw-r--r--src/auth/cephx/CephxProtocol.h124
-rw-r--r--src/ceph-volume/ceph_volume/devices/lvm/listing.py4
-rw-r--r--src/ceph-volume/ceph_volume/util/device.py10
-rw-r--r--src/ceph-volume/ceph_volume/util/disk.py2
-rwxr-xr-xsrc/cephadm/cephadm.py2616
-rw-r--r--src/cephadm/cephadmlib/container_daemon_form.py65
-rw-r--r--src/cephadm/cephadmlib/daemons/__init__.py24
-rw-r--r--src/cephadm/cephadmlib/daemons/ceph.py462
-rw-r--r--src/cephadm/cephadmlib/daemons/custom.py222
-rw-r--r--src/cephadm/cephadmlib/daemons/ingress.py290
-rw-r--r--src/cephadm/cephadmlib/daemons/iscsi.py286
-rw-r--r--src/cephadm/cephadmlib/daemons/monitoring.py377
-rw-r--r--src/cephadm/cephadmlib/daemons/nfs.py225
-rw-r--r--src/cephadm/cephadmlib/daemons/nvmeof.py193
-rw-r--r--src/cephadm/cephadmlib/daemons/snmp.py226
-rw-r--r--src/cephadm/cephadmlib/daemons/tracing.py116
-rw-r--r--src/cephadm/cephadmlib/deployment_utils.py35
-rw-r--r--src/cephadm/cephadmlib/file_utils.py4
-rw-r--r--src/cephadm/cephadmlib/logging.py65
-rw-r--r--src/cephadm/cephadmlib/runscripts.py244
-rw-r--r--src/cephadm/cephadmlib/systemd_unit.py81
-rw-r--r--src/cephadm/cephadmlib/templates/cephadm.logrotate.config.j29
-rw-r--r--src/cephadm/cephadmlib/templates/cluster.logrotate.config.j213
-rw-r--r--src/cephadm/cephadmlib/templating.py46
-rw-r--r--src/cephadm/tests/fixtures.py82
-rw-r--r--src/cephadm/tests/test_cephadm.py126
-rw-r--r--src/cephadm/tests/test_container_engine.py8
-rw-r--r--src/cephadm/tests/test_daemon_form.py3
-rw-r--r--src/cephadm/tests/test_deploy.py96
-rw-r--r--src/cephadm/tests/test_logrotate_config.py57
-rw-r--r--src/cephadm/tests/test_nfs.py6
-rw-r--r--src/cephadm/tests/test_unit_file.py16
-rw-r--r--src/cls/2pc_queue/cls_2pc_queue_ops.h70
-rw-r--r--src/cls/2pc_queue/cls_2pc_queue_types.h36
-rw-r--r--src/cls/cas/cls_cas_internal.h11
-rw-r--r--src/cls/fifo/cls_fifo_ops.h48
-rw-r--r--src/cls/fifo/cls_fifo_types.h62
-rw-r--r--src/cls/log/cls_log_ops.h56
-rw-r--r--src/cls/log/cls_log_types.h10
-rw-r--r--src/cls/queue/cls_queue_ops.h74
-rw-r--r--src/cls/queue/cls_queue_types.h43
-rw-r--r--src/cls/rgw/cls_rgw_client.cc19
-rw-r--r--src/cls/rgw/cls_rgw_client.h11
-rw-r--r--src/cls/timeindex/cls_timeindex_ops.h38
-rw-r--r--src/cls/timeindex/cls_timeindex_types.h2
-rw-r--r--src/cls/version/cls_version_ops.h55
-rw-r--r--src/cls/version/cls_version_types.cc6
-rw-r--r--src/cls/version/cls_version_types.h18
-rw-r--r--src/common/RefCountedObj.h8
-rw-r--r--src/common/ceph_json.h55
-rw-r--r--src/common/entity_name.cc63
-rw-r--r--src/common/entity_name.h3
-rw-r--r--src/common/mempool.cc30
-rw-r--r--src/common/options/rgw.yaml.in6
-rw-r--r--src/common/versioned_variant.h234
-rw-r--r--src/crimson/common/shared_lru.h10
-rw-r--r--src/crimson/common/smp_helpers.h146
-rw-r--r--src/crimson/mgr/client.h2
-rw-r--r--src/crimson/net/Connection.h35
-rw-r--r--src/crimson/net/Fwd.h2
-rw-r--r--src/crimson/net/ProtocolV2.cc6
-rw-r--r--src/crimson/net/ProtocolV2.h8
-rw-r--r--src/crimson/net/SocketConnection.cc5
-rw-r--r--src/crimson/net/SocketConnection.h2
-rw-r--r--src/crimson/net/io_handler.cc204
-rw-r--r--src/crimson/net/io_handler.h101
-rw-r--r--src/crimson/osd/object_context_loader.cc16
-rw-r--r--src/crimson/osd/object_context_loader.h5
-rw-r--r--src/crimson/osd/ops_executer.cc10
-rw-r--r--src/crimson/osd/osd.cc72
-rw-r--r--src/crimson/osd/osd.h12
-rw-r--r--src/crimson/osd/osd_connection_priv.h73
-rw-r--r--src/crimson/osd/osd_meta.cc36
-rw-r--r--src/crimson/osd/osd_meta.h5
-rw-r--r--src/crimson/osd/osd_operations/client_request.cc46
-rw-r--r--src/crimson/osd/osd_operations/client_request.h10
-rw-r--r--src/crimson/osd/osd_operations/client_request_common.cc46
-rw-r--r--src/crimson/osd/osd_operations/client_request_common.h7
-rw-r--r--src/crimson/osd/osd_operations/common/pg_pipeline.h2
-rw-r--r--src/crimson/osd/osd_operations/internal_client_request.cc2
-rw-r--r--src/crimson/osd/osd_operations/snaptrim_event.cc164
-rw-r--r--src/crimson/osd/osd_operations/snaptrim_event.h37
-rw-r--r--src/crimson/osd/pg.cc29
-rw-r--r--src/crimson/osd/pg.h15
-rw-r--r--src/crimson/osd/pg_backend.cc2
-rw-r--r--src/crimson/osd/pg_recovery.cc149
-rw-r--r--src/crimson/osd/pg_recovery.h2
-rw-r--r--src/crimson/osd/pg_recovery_listener.h2
-rw-r--r--src/crimson/osd/pg_shard_manager.cc9
-rw-r--r--src/crimson/osd/pg_shard_manager.h7
-rw-r--r--src/crimson/osd/recovery_backend.h6
-rw-r--r--src/crimson/osd/replicated_recovery_backend.cc127
-rw-r--r--src/crimson/osd/replicated_recovery_backend.h3
-rw-r--r--src/crimson/osd/shard_services.cc90
-rw-r--r--src/crimson/osd/shard_services.h15
-rw-r--r--src/include/cephfs/types.h2
-rw-r--r--src/include/frag.h14
-rw-r--r--src/include/fs_types.h7
-rw-r--r--src/include/mempool.h92
-rw-r--r--src/include/object.h18
-rw-r--r--src/include/types.h33
-rw-r--r--src/librados/snap_set_diff.cc2
-rw-r--r--src/librbd/Journal.cc86
-rw-r--r--src/librbd/Journal.h23
-rw-r--r--src/librbd/io/ImageRequest.cc54
-rw-r--r--src/librbd/io/ImageRequest.h21
-rw-r--r--src/librbd/io/ObjectRequest.cc15
-rw-r--r--src/librbd/librbd.cc6
-rw-r--r--src/mds/CDir.cc1
-rw-r--r--src/mds/CInode.cc4
-rw-r--r--src/mds/DamageTable.cc28
-rw-r--r--src/mds/DamageTable.h7
-rw-r--r--src/mds/MDSMap.cc5
-rw-r--r--src/mds/locks.c2
-rw-r--r--src/mds/mdstypes.cc15
-rw-r--r--src/messages/MClientReply.h12
-rw-r--r--src/messages/MClientRequest.h31
-rw-r--r--src/messages/MMDSCacheRejoin.h32
-rw-r--r--src/messages/MMgrReport.h21
-rw-r--r--src/messages/MOSDScrubReserve.h2
-rw-r--r--src/mgr/ActivePyModules.cc36
-rw-r--r--src/mgr/ActivePyModules.h1
-rw-r--r--src/mgr/ClusterState.cc8
-rw-r--r--src/mgr/ClusterState.h1
-rw-r--r--src/mgr/DaemonServer.cc19
-rw-r--r--src/mgr/DaemonServer.h2
-rw-r--r--src/mgr/Mgr.cc27
-rw-r--r--src/mgr/Mgr.h3
-rw-r--r--src/mgr/MgrStandby.cc35
-rw-r--r--src/mgr/MgrStandby.h1
-rw-r--r--src/mgr/PyModuleRegistry.cc47
-rw-r--r--src/mgr/PyModuleRegistry.h3
-rw-r--r--src/os/bluestore/AvlAllocator.cc6
-rw-r--r--src/os/bluestore/BlueFS.cc89
-rw-r--r--src/os/bluestore/BlueFS.h106
-rw-r--r--src/os/bluestore/BlueStore.cc10
-rw-r--r--src/os/bluestore/BlueStore.h93
-rw-r--r--src/os/bluestore/BtreeAllocator.cc6
-rw-r--r--src/os/bluestore/StupidAllocator.cc32
-rw-r--r--src/os/bluestore/StupidAllocator.h4
-rw-r--r--src/os/bluestore/bluestore_types.cc9
-rw-r--r--src/os/bluestore/bluestore_types.h1
-rw-r--r--src/os/bluestore/fastbmap_allocator_impl.cc16
-rw-r--r--src/osd/PG.cc26
-rw-r--r--src/osd/PG.h9
-rw-r--r--src/osd/PeeringState.cc2
-rw-r--r--src/osd/PeeringState.h1
-rw-r--r--src/osd/scrubber/osd_scrub.cc8
-rw-r--r--src/osd/scrubber/osd_scrub.h4
-rw-r--r--src/osd/scrubber/pg_scrubber.cc123
-rw-r--r--src/osd/scrubber/pg_scrubber.h41
-rw-r--r--src/osd/scrubber/scrub_machine.cc171
-rw-r--r--src/osd/scrubber/scrub_machine.h213
-rw-r--r--src/osd/scrubber/scrub_machine_lstnr.h9
-rw-r--r--src/osd/scrubber/scrub_reservations.cc17
-rw-r--r--src/osd/scrubber/scrub_resources.cc83
-rw-r--r--src/osd/scrubber/scrub_resources.h14
-rw-r--r--src/osd/scrubber_common.h8
-rw-r--r--src/pybind/mgr/dashboard/controllers/_crud.py14
-rw-r--r--src/pybind/mgr/dashboard/controllers/ceph_users.py3
-rw-r--r--src/pybind/mgr/dashboard/controllers/cephfs.py122
-rw-r--r--src/pybind/mgr/dashboard/controllers/rgw.py87
-rw-r--r--src/pybind/mgr/dashboard/frontend/cypress/e2e/common/table-helper.feature.po.ts13
-rw-r--r--src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/filesystems.e2e-spec.feature19
-rw-r--r--src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/snapshots.e2e-spec.feature53
-rw-r--r--src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/subvolume-groups.e2e-spec.feature4
-rw-r--r--src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/roles.e2e-spec.ts14
-rw-r--r--src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/roles.po.ts26
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts2
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.html13
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.spec.ts37
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.ts22
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-list/cephfs-list.component.ts2
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.html58
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.scss0
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.spec.ts30
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.ts99
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-group/cephfs-subvolume-group.component.ts16
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-list/cephfs-subvolume-list.component.html23
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-list/cephfs-subvolume-list.component.ts54
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.html36
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.scss0
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.spec.ts38
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.ts148
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-tabs/cephfs-tabs.component.html91
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs.module.ts10
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.html59
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.scss0
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.spec.ts27
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.ts75
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.html15
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.html60
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.ts69
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts11
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/core/context/context.component.ts12
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-snapshot-schedule.service.spec.ts22
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-snapshot-schedule.service.ts39
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume-group.service.ts10
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume.service.spec.ts10
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume.service.ts30
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.spec.ts10
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.ts10
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.html21
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.scss6
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.spec.ts38
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.ts11
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts10
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.html24
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.scss3
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.spec.ts60
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.ts37
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/crud-table/crud-table.component.ts6
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/datatable.module.ts6
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/forms/crud-form/helpers.ts10
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/forms/crud-form/validators/rgw-role-validator.ts9
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolume-group.model.ts2
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolume.model.ts10
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolumegroup.model.ts13
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/models/crud-table-metadata.ts1
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/app/shared/models/snapshot-schedule.ts17
-rw-r--r--src/pybind/mgr/dashboard/frontend/src/styles/defaults/_bootstrap-defaults.scss2
-rw-r--r--src/pybind/mgr/dashboard/openapi.yaml204
-rw-r--r--src/pybind/mgr/dashboard/services/rgw_client.py38
-rw-r--r--src/pybind/mgr/devicehealth/module.py4
-rw-r--r--src/pybind/mgr/nfs/export.py50
-rw-r--r--src/pybind/mgr/object_format.py55
-rw-r--r--src/pybind/mgr/tests/test_object_format.py26
-rw-r--r--src/pybind/mgr/volumes/fs/async_cloner.py43
-rw-r--r--src/pybind/mgr/volumes/fs/operations/access.py2
-rw-r--r--src/pybind/mgr/volumes/fs/operations/lock.py4
-rw-r--r--src/pybind/mgr/volumes/fs/operations/pin_util.py3
-rw-r--r--src/pybind/mgr/volumes/fs/operations/trash.py1
-rw-r--r--src/pybind/mgr/volumes/fs/operations/versions/__init__.py2
-rw-r--r--src/pybind/mgr/volumes/fs/operations/versions/metadata_manager.py1
-rw-r--r--src/pybind/mgr/volumes/fs/operations/versions/op_sm.py2
-rw-r--r--src/pybind/mgr/volumes/fs/operations/versions/subvolume_base.py4
-rw-r--r--src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py39
-rw-r--r--src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py25
-rw-r--r--src/pybind/mgr/volumes/fs/operations/volume.py4
-rw-r--r--src/pybind/mgr/volumes/fs/volume.py10
-rwxr-xr-xsrc/pybind/rbd/setup.py14
-rw-r--r--src/python-common/ceph/rgw/types.py1
-rw-r--r--src/rgw/CMakeLists.txt1
-rw-r--r--src/rgw/driver/daos/rgw_sal_daos.cc4
-rw-r--r--src/rgw/driver/daos/rgw_sal_daos.h4
-rw-r--r--src/rgw/driver/dbstore/common/dbstore.h3
-rw-r--r--src/rgw/driver/motr/rgw_sal_motr.cc4
-rw-r--r--src/rgw/driver/motr/rgw_sal_motr.h4
-rw-r--r--src/rgw/driver/posix/rgw_sal_posix.cc4
-rw-r--r--src/rgw/driver/posix/rgw_sal_posix.h2
-rw-r--r--src/rgw/driver/rados/rgw_bucket.cc21
-rw-r--r--src/rgw/driver/rados/rgw_bucket.h1
-rw-r--r--src/rgw/driver/rados/rgw_cr_rados.cc39
-rw-r--r--src/rgw/driver/rados/rgw_cr_rados.h13
-rw-r--r--src/rgw/driver/rados/rgw_d3n_datacache.cc2
-rw-r--r--src/rgw/driver/rados/rgw_d3n_datacache.h20
-rw-r--r--src/rgw/driver/rados/rgw_data_sync.cc41
-rw-r--r--src/rgw/driver/rados/rgw_lc_tier.cc1
-rw-r--r--src/rgw/driver/rados/rgw_object_expirer_core.cc34
-rw-r--r--src/rgw/driver/rados/rgw_object_expirer_core.h9
-rw-r--r--src/rgw/driver/rados/rgw_period.cc2
-rw-r--r--src/rgw/driver/rados/rgw_pubsub_push.cc169
-rw-r--r--src/rgw/driver/rados/rgw_putobj_processor.cc18
-rw-r--r--src/rgw/driver/rados/rgw_putobj_processor.h3
-rw-r--r--src/rgw/driver/rados/rgw_rados.cc327
-rw-r--r--src/rgw/driver/rados/rgw_rados.h17
-rw-r--r--src/rgw/driver/rados/rgw_sal_rados.cc35
-rw-r--r--src/rgw/driver/rados/rgw_sal_rados.h4
-rw-r--r--src/rgw/driver/rados/rgw_service.cc53
-rw-r--r--src/rgw/driver/rados/rgw_service.h25
-rw-r--r--src/rgw/driver/rados/rgw_sync_error_repo.cc33
-rw-r--r--src/rgw/driver/rados/rgw_sync_error_repo.h5
-rw-r--r--src/rgw/driver/rados/rgw_sync_module_aws.cc1
-rw-r--r--src/rgw/driver/rados/rgw_sync_module_es.cc13
-rw-r--r--src/rgw/driver/rados/rgw_sync_module_es_rest.cc2
-rw-r--r--src/rgw/driver/rados/rgw_tools.cc107
-rw-r--r--src/rgw/driver/rados/rgw_tools.h71
-rw-r--r--src/rgw/driver/rados/rgw_trim_bilog.cc32
-rw-r--r--src/rgw/driver/rados/rgw_trim_datalog.cc2
-rw-r--r--src/rgw/driver/rados/rgw_trim_mdlog.cc6
-rw-r--r--src/rgw/driver/rados/rgw_user.cc16
-rw-r--r--src/rgw/driver/rados/rgw_user.h2
-rw-r--r--src/rgw/driver/rados/sync_fairness.cc24
-rw-r--r--src/rgw/rgw_acl.cc155
-rw-r--r--src/rgw/rgw_acl.h290
-rw-r--r--src/rgw/rgw_acl_s3.cc596
-rw-r--r--src/rgw/rgw_acl_s3.h120
-rw-r--r--src/rgw/rgw_acl_swift.cc333
-rw-r--r--src/rgw/rgw_acl_swift.h94
-rw-r--r--src/rgw/rgw_acl_types.h17
-rw-r--r--src/rgw/rgw_admin.cc16
-rw-r--r--src/rgw/rgw_common.cc140
-rw-r--r--src/rgw/rgw_common.h46
-rw-r--r--src/rgw/rgw_file.cc2
-rw-r--r--src/rgw/rgw_file_int.h32
-rw-r--r--src/rgw/rgw_kafka.cc10
-rw-r--r--src/rgw/rgw_lc.cc48
-rw-r--r--src/rgw/rgw_lib.cc13
-rw-r--r--src/rgw/rgw_log.cc11
-rw-r--r--src/rgw/rgw_lua_request.cc28
-rw-r--r--src/rgw/rgw_main.cc7
-rw-r--r--src/rgw/rgw_mdlog.h9
-rw-r--r--src/rgw/rgw_multipart_meta_filter.cc8
-rw-r--r--src/rgw/rgw_op.cc303
-rw-r--r--src/rgw/rgw_op.h45
-rw-r--r--src/rgw/rgw_quota.cc225
-rw-r--r--src/rgw/rgw_rest_client.cc42
-rw-r--r--src/rgw/rgw_rest_client.h2
-rw-r--r--src/rgw/rgw_rest_metadata.cc4
-rw-r--r--src/rgw/rgw_rest_pubsub.cc32
-rw-r--r--src/rgw/rgw_rest_s3.cc80
-rw-r--r--src/rgw/rgw_rest_s3.h3
-rw-r--r--src/rgw/rgw_rest_swift.cc60
-rw-r--r--src/rgw/rgw_sal.cc9
-rw-r--r--src/rgw/rgw_sal.h45
-rw-r--r--src/rgw/rgw_sal_dbstore.cc14
-rw-r--r--src/rgw/rgw_sal_dbstore.h4
-rw-r--r--src/rgw/rgw_sal_filter.cc4
-rw-r--r--src/rgw/rgw_sal_filter.h4
-rw-r--r--src/rgw/rgw_sal_fwd.h18
-rw-r--r--src/rgw/rgw_user_types.h45
-rw-r--r--src/rgw/rgw_zone.cc23
-rw-r--r--src/rgw/services/svc_bi_rados.cc66
-rw-r--r--src/rgw/services/svc_bi_rados.h24
-rw-r--r--src/rgw/services/svc_bilog_rados.cc18
-rw-r--r--src/rgw/services/svc_bilog_rados.h6
-rw-r--r--src/rgw/services/svc_cls.cc106
-rw-r--r--src/rgw/services/svc_cls.h32
-rw-r--r--src/rgw/services/svc_config_key_rados.cc21
-rw-r--r--src/rgw/services/svc_config_key_rados.h14
-rw-r--r--src/rgw/services/svc_mdlog.cc34
-rw-r--r--src/rgw/services/svc_mdlog.h10
-rw-r--r--src/rgw/services/svc_notify.cc30
-rw-r--r--src/rgw/services/svc_notify.h18
-rw-r--r--src/rgw/services/svc_rados.cc445
-rw-r--r--src/rgw/services/svc_rados.h252
-rw-r--r--src/rgw/services/svc_sys_obj.cc1
-rw-r--r--src/rgw/services/svc_sys_obj.h7
-rw-r--r--src/rgw/services/svc_sys_obj_cache.cc1
-rw-r--r--src/rgw/services/svc_sys_obj_cache.h10
-rw-r--r--src/rgw/services/svc_sys_obj_core.cc78
-rw-r--r--src/rgw/services/svc_sys_obj_core.h11
-rw-r--r--src/rgw/services/svc_sys_obj_core_types.h18
-rw-r--r--src/rgw/services/svc_tier_rados.cc6
-rw-r--r--src/rgw/services/svc_tier_rados.h25
-rw-r--r--src/rgw/services/svc_user.h5
-rw-r--r--src/rgw/services/svc_user_rados.cc67
-rw-r--r--src/rgw/services/svc_user_rados.h9
-rw-r--r--src/rgw/services/svc_zone.cc11
-rw-r--r--src/rgw/services/svc_zone.h5
-rw-r--r--src/rgw/services/svc_zone_utils.cc29
-rw-r--r--src/rgw/services/svc_zone_utils.h7
-rw-r--r--src/test/common/CMakeLists.txt4
-rw-r--r--src/test/common/test_versioned_variant.cc341
-rw-r--r--src/test/librbd/CMakeLists.txt3
-rw-r--r--src/test/librbd/io/test_mock_ImageRequest.cc31
-rw-r--r--src/test/librbd/io/test_mock_ObjectRequest.cc115
-rw-r--r--src/test/librbd/journal/test_Entries.cc63
-rw-r--r--src/test/librbd/journal/test_Stress.cc121
-rw-r--r--src/test/librbd/test_librbd.cc365
-rw-r--r--src/test/librbd/test_main.cc2
-rw-r--r--src/test/librbd/test_mock_Journal.cc2
-rw-r--r--src/test/objectstore/Allocator_test.cc3
-rw-r--r--src/test/objectstore/fastbmap_allocator_test.cc181
-rw-r--r--src/test/objectstore/store_test.cc208
-rw-r--r--src/test/pybind/test_rbd.py22
-rw-r--r--src/test/rgw/test_rgw_lc.cc237
-rw-r--r--src/test/rgw/test_rgw_lua.cc53
-rw-r--r--src/test/test_c2c.cc2
-rw-r--r--src/test/test_mempool.cc2
-rw-r--r--src/test/test_rgw_admin_meta.cc2
-rw-r--r--src/tools/ceph-dencoder/common_types.h170
-rwxr-xr-xsrc/vstart.sh48
378 files changed, 12567 insertions, 7753 deletions
diff --git a/src/auth/Auth.h b/src/auth/Auth.h
index 5521c8d3fcf..83e23b34dbe 100644
--- a/src/auth/Auth.h
+++ b/src/auth/Auth.h
@@ -16,6 +16,7 @@
#define CEPH_AUTHTYPES_H
#include "Crypto.h"
+#include "common/ceph_json.h"
#include "common/entity_name.h"
// The _MAX values are a bit wonky here because we are overloading the first
@@ -59,6 +60,14 @@ struct EntityAuth {
decode(pending_key, bl);
}
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_object("key", key);
+ encode_json("caps", caps, f);
+ f->dump_object("pending_key", pending_key);
+ }
+ static void generate_test_instances(std::list<EntityAuth*>& ls) {
+ ls.push_back(new EntityAuth);
+ }
};
WRITE_CLASS_ENCODER(EntityAuth)
@@ -95,6 +104,19 @@ struct AuthCapsInfo {
allow_all = (bool)a;
decode(caps, bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_bool("allow_all", allow_all);
+ encode_json("caps", caps, f);
+ f->dump_unsigned("caps_len", caps.length());
+ }
+ static void generate_test_instances(std::list<AuthCapsInfo*>& ls) {
+ ls.push_back(new AuthCapsInfo);
+ ls.push_back(new AuthCapsInfo);
+ ls.back()->allow_all = true;
+ ls.push_back(new AuthCapsInfo);
+ ls.back()->caps.append("foo");
+ ls.back()->caps.append("bar");
+ }
};
WRITE_CLASS_ENCODER(AuthCapsInfo)
@@ -147,6 +169,25 @@ struct AuthTicket {
decode(caps, bl);
decode(flags, bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_object("name", name);
+ f->dump_unsigned("global_id", global_id);
+ f->dump_stream("created") << created;
+ f->dump_stream("renew_after") << renew_after;
+ f->dump_stream("expires") << expires;
+ f->dump_object("caps", caps);
+ f->dump_unsigned("flags", flags);
+ }
+ static void generate_test_instances(std::list<AuthTicket*>& ls) {
+ ls.push_back(new AuthTicket);
+ ls.push_back(new AuthTicket);
+ ls.back()->name.set_id("client.123");
+ ls.back()->global_id = 123;
+ ls.back()->init_timestamps(utime_t(123, 456), 7);
+ ls.back()->caps.caps.append("foo");
+ ls.back()->caps.caps.append("bar");
+ ls.back()->flags = 0x12345678;
+ }
};
WRITE_CLASS_ENCODER(AuthTicket)
@@ -231,6 +272,16 @@ struct ExpiringCryptoKey {
decode(key, bl);
decode(expiration, bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_object("key", key);
+ f->dump_stream("expiration") << expiration;
+ }
+ static void generate_test_instances(std::list<ExpiringCryptoKey*>& ls) {
+ ls.push_back(new ExpiringCryptoKey);
+ ls.push_back(new ExpiringCryptoKey);
+ ls.back()->key.set_secret(
+ CEPH_CRYPTO_AES, bufferptr("1234567890123456", 16), utime_t(123, 456));
+ }
};
WRITE_CLASS_ENCODER(ExpiringCryptoKey)
@@ -295,6 +346,15 @@ struct RotatingSecrets {
}
void dump();
+ void dump(ceph::Formatter *f) const {
+ encode_json("secrets", secrets, f);
+ }
+ static void generate_test_instances(std::list<RotatingSecrets*>& ls) {
+ ls.push_back(new RotatingSecrets);
+ ls.push_back(new RotatingSecrets);
+ auto eck = new ExpiringCryptoKey;
+ ls.back()->add(*eck);
+ }
};
WRITE_CLASS_ENCODER(RotatingSecrets)
diff --git a/src/auth/Crypto.cc b/src/auth/Crypto.cc
index ce666e8bdc8..5d68d3470bc 100644
--- a/src/auth/Crypto.cc
+++ b/src/auth/Crypto.cc
@@ -511,6 +511,23 @@ void CryptoKey::decode(bufferlist::const_iterator& bl)
throw ceph::buffer::malformed_input("malformed secret");
}
+void CryptoKey::dump(Formatter *f) const
+{
+ f->dump_int("type", type);
+ f->dump_stream("created") << created;
+ f->dump_int("secret.length", secret.length());
+}
+
+void CryptoKey::generate_test_instances(std::list<CryptoKey*>& ls)
+{
+ ls.push_back(new CryptoKey);
+ ls.push_back(new CryptoKey);
+ ls.back()->type = CEPH_CRYPTO_AES;
+ ls.back()->set_secret(
+ CEPH_CRYPTO_AES, bufferptr("1234567890123456", 16), utime_t(123, 456));
+ ls.back()->created = utime_t(123, 456);
+}
+
int CryptoKey::set_secret(int type, const bufferptr& s, utime_t c)
{
int r = _set_secret(type, s);
diff --git a/src/auth/Crypto.h b/src/auth/Crypto.h
index a29ac1abd81..3ce655a1256 100644
--- a/src/auth/Crypto.h
+++ b/src/auth/Crypto.h
@@ -111,6 +111,8 @@ public:
void encode(ceph::buffer::list& bl) const;
void decode(ceph::buffer::list::const_iterator& bl);
+ void dump(ceph::Formatter *f) const;
+ static void generate_test_instances(std::list<CryptoKey*>& ls);
void clear() {
*this = CryptoKey();
diff --git a/src/auth/cephx/CephxKeyServer.cc b/src/auth/cephx/CephxKeyServer.cc
index 236ac451add..a5ad1f2b797 100644
--- a/src/auth/cephx/CephxKeyServer.cc
+++ b/src/auth/cephx/CephxKeyServer.cc
@@ -257,6 +257,16 @@ std::map<EntityName,CryptoKey> KeyServer::get_used_pending_keys()
return ret;
}
+void KeyServer::dump(Formatter *f) const
+{
+ f->dump_object("data", data);
+}
+
+void KeyServer::generate_test_instances(std::list<KeyServer*>& ls)
+{
+ ls.push_back(new KeyServer(nullptr, nullptr));
+}
+
bool KeyServer::generate_secret(CryptoKey& secret)
{
bufferptr bp;
diff --git a/src/auth/cephx/CephxKeyServer.h b/src/auth/cephx/CephxKeyServer.h
index 64915c8ce4a..d147dd441ad 100644
--- a/src/auth/cephx/CephxKeyServer.h
+++ b/src/auth/cephx/CephxKeyServer.h
@@ -21,15 +21,16 @@
#include "include/common_fwd.h"
struct KeyServerData {
- version_t version;
+ version_t version{0};
/* for each entity */
std::map<EntityName, EntityAuth> secrets;
- KeyRing *extra_secrets;
+ KeyRing *extra_secrets = nullptr;
/* for each service type */
- version_t rotating_ver;
+ version_t rotating_ver{0};
std::map<uint32_t, RotatingSecrets> rotating_secrets;
+ KeyServerData() {}
explicit KeyServerData(KeyRing *extra)
: version(0),
@@ -70,7 +71,17 @@ struct KeyServerData {
decode(rotating_ver, iter);
decode(rotating_secrets, iter);
}
-
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("version", version);
+ f->dump_unsigned("rotating_version", rotating_ver);
+ encode_json("secrets", secrets, f);
+ encode_json("rotating_secrets", rotating_secrets, f);
+ }
+ static void generate_test_instances(std::list<KeyServerData*>& ls) {
+ ls.push_back(new KeyServerData);
+ ls.push_back(new KeyServerData);
+ ls.back()->version = 1;
+ }
bool contains(const EntityName& name) const {
return (secrets.find(name) != secrets.end());
}
@@ -159,8 +170,21 @@ struct KeyServerData {
decode(auth, bl);
}
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("op", op);
+ f->dump_object("name", name);
+ f->dump_object("auth", auth);
+ }
+ static void generate_test_instances(std::list<Incremental*>& ls) {
+ ls.push_back(new Incremental);
+ ls.back()->op = AUTH_INC_DEL;
+ ls.push_back(new Incremental);
+ ls.back()->op = AUTH_INC_ADD;
+ ls.push_back(new Incremental);
+ ls.back()->op = AUTH_INC_SET_ROTATING;
+ }
};
-
+
void apply_incremental(Incremental& inc) {
switch (inc.op) {
case AUTH_INC_ADD:
@@ -188,8 +212,6 @@ WRITE_CLASS_ENCODER(KeyServerData)
WRITE_CLASS_ENCODER(KeyServerData::Incremental)
-
-
class KeyServer : public KeyStore {
CephContext *cct;
KeyServerData data;
@@ -205,7 +227,9 @@ class KeyServer : public KeyStore {
bool _get_service_caps(const EntityName& name, uint32_t service_id,
AuthCapsInfo& caps) const;
public:
+ KeyServer() : lock{ceph::make_mutex("KeyServer::lock")} {}
KeyServer(CephContext *cct_, KeyRing *extra_secrets);
+ KeyServer& operator=(const KeyServer&) = delete;
bool generate_secret(CryptoKey& secret);
bool get_secret(const EntityName& name, CryptoKey& secret) const override;
@@ -248,6 +272,8 @@ public:
using ceph::decode;
decode(data, bl);
}
+ void dump(ceph::Formatter *f) const;
+ static void generate_test_instances(std::list<KeyServer*>& ls);
bool contains(const EntityName& name) const;
int encode_secrets(ceph::Formatter *f, std::stringstream *ds) const;
void encode_formatted(std::string label, ceph::Formatter *f, ceph::buffer::list &bl);
diff --git a/src/auth/cephx/CephxProtocol.h b/src/auth/cephx/CephxProtocol.h
index aabfaaad10c..260cb13ff5a 100644
--- a/src/auth/cephx/CephxProtocol.h
+++ b/src/auth/cephx/CephxProtocol.h
@@ -55,6 +55,13 @@ struct CephXServerChallenge {
decode(struct_v, bl);
decode(server_challenge, bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("server_challenge", server_challenge);
+ }
+ static void generate_test_instances(std::list<CephXServerChallenge*>& ls) {
+ ls.push_back(new CephXServerChallenge);
+ ls.back()->server_challenge = 1;
+ }
};
WRITE_CLASS_ENCODER(CephXServerChallenge)
@@ -72,6 +79,13 @@ struct CephXRequestHeader {
using ceph::decode;
decode(request_type, bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("request_type", request_type);
+ }
+ static void generate_test_instances(std::list<CephXRequestHeader*>& ls) {
+ ls.push_back(new CephXRequestHeader);
+ ls.back()->request_type = 1;
+ }
};
WRITE_CLASS_ENCODER(CephXRequestHeader)
@@ -89,6 +103,15 @@ struct CephXResponseHeader {
decode(request_type, bl);
decode(status, bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("request_type", request_type);
+ f->dump_int("status", status);
+ }
+ static void generate_test_instances(std::list<CephXResponseHeader*>& ls) {
+ ls.push_back(new CephXResponseHeader);
+ ls.back()->request_type = 1;
+ ls.back()->status = 0;
+ }
};
WRITE_CLASS_ENCODER(CephXResponseHeader)
@@ -113,6 +136,17 @@ struct CephXTicketBlob {
decode(secret_id, bl);
decode(blob, bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("secret_id", secret_id);
+ f->dump_unsigned("blob_len", blob.length());
+ }
+
+ static void generate_test_instances(std::list<CephXTicketBlob*>& ls) {
+ ls.push_back(new CephXTicketBlob);
+ ls.back()->secret_id = 123;
+ ls.back()->blob.append(std::string_view("this is a blob"));
+ }
};
WRITE_CLASS_ENCODER(CephXTicketBlob)
@@ -152,6 +186,25 @@ struct CephXAuthenticate {
// old_ticket both on reconnects and renewals
old_ticket_may_be_omitted = struct_v < 3;
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("client_challenge", client_challenge);
+ f->dump_unsigned("key", key);
+ f->open_object_section("old_ticket");
+ old_ticket.dump(f);
+ f->close_section();
+ f->dump_unsigned("other_keys", other_keys);
+ }
+ static void generate_test_instances(std::list<CephXAuthenticate*>& ls) {
+ ls.push_back(new CephXAuthenticate);
+ ls.back()->client_challenge = 0;
+ ls.back()->key = 0;
+ ls.push_back(new CephXAuthenticate);
+ ls.back()->client_challenge = 1;
+ ls.back()->key = 2;
+ ls.back()->old_ticket.secret_id = 3;
+ ls.back()->old_ticket.blob.append(std::string_view("this is a blob"));
+ ls.back()->other_keys = 4;
+ }
};
WRITE_CLASS_ENCODER(CephXAuthenticate)
@@ -168,6 +221,15 @@ struct CephXChallengeBlob {
decode(server_challenge, bl);
decode(client_challenge, bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("server_challenge", server_challenge);
+ f->dump_unsigned("client_challenge", client_challenge);
+ }
+ static void generate_test_instances(std::list<CephXChallengeBlob*>& ls) {
+ ls.push_back(new CephXChallengeBlob);
+ ls.back()->server_challenge = 123;
+ ls.back()->client_challenge = 456;
+ }
};
WRITE_CLASS_ENCODER(CephXChallengeBlob)
@@ -218,6 +280,15 @@ struct CephXServiceTicketRequest {
decode(struct_v, bl);
decode(keys, bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("keys", keys);
+ }
+
+ static void generate_test_instances(std::list<CephXServiceTicketRequest*>& ls) {
+ ls.push_back(new CephXServiceTicketRequest);
+ ls.back()->keys = 123;
+ }
};
WRITE_CLASS_ENCODER(CephXServiceTicketRequest)
@@ -251,6 +322,17 @@ struct CephXAuthorizeReply {
decode(connection_secret, bl);
}
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("nonce_plus_one", nonce_plus_one);
+ f->dump_string("connection_secret", connection_secret);
+ }
+ static void generate_test_instances(std::list<CephXAuthorizeReply*>& ls) {
+ ls.push_back(new CephXAuthorizeReply);
+ ls.back()->nonce_plus_one = 0;
+ ls.push_back(new CephXAuthorizeReply);
+ ls.back()->nonce_plus_one = 123;
+ ls.back()->connection_secret = "secret";
+ }
};
WRITE_CLASS_ENCODER(CephXAuthorizeReply)
@@ -353,6 +435,17 @@ struct CephXServiceTicket {
decode(session_key, bl);
decode(validity, bl);
}
+ void dump(ceph::Formatter *f) const {
+ session_key.dump(f);
+ validity.dump(f);
+ }
+ static void generate_test_instances(std::list<CephXServiceTicket*>& ls) {
+ ls.push_back(new CephXServiceTicket);
+ ls.push_back(new CephXServiceTicket);
+ ls.back()->session_key.set_secret(
+ CEPH_CRYPTO_AES, bufferptr("1234567890123456", 16), utime_t(123, 456));
+ ls.back()->validity = utime_t(123, 456);
+ }
};
WRITE_CLASS_ENCODER(CephXServiceTicket)
@@ -375,6 +468,18 @@ struct CephXServiceTicketInfo {
decode(ticket, bl);
decode(session_key, bl);
}
+ void dump(ceph::Formatter *f) const {
+ ticket.dump(f);
+ session_key.dump(f);
+ }
+ static void generate_test_instances(std::list<CephXServiceTicketInfo*>& ls) {
+ ls.push_back(new CephXServiceTicketInfo);
+ ls.push_back(new CephXServiceTicketInfo);
+ ls.back()->ticket.global_id = 1234;
+ ls.back()->ticket.init_timestamps(utime_t(123, 456), utime_t(123, 456));
+ ls.back()->session_key.set_secret(
+ CEPH_CRYPTO_AES, bufferptr("1234567890123456", 16), utime_t(123, 456));
+ }
};
WRITE_CLASS_ENCODER(CephXServiceTicketInfo)
@@ -392,6 +497,13 @@ struct CephXAuthorizeChallenge : public AuthAuthorizerChallenge {
decode(struct_v, bl);
decode(server_challenge, bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("server_challenge", server_challenge);
+ }
+ static void generate_test_instances(std::list<CephXAuthorizeChallenge*>& ls) {
+ ls.push_back(new CephXAuthorizeChallenge);
+ ls.back()->server_challenge = 1234;
+ }
};
WRITE_CLASS_ENCODER(CephXAuthorizeChallenge)
@@ -417,6 +529,18 @@ struct CephXAuthorize {
decode(server_challenge_plus_one, bl);
}
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("nonce", nonce);
+ f->dump_unsigned("have_challenge", have_challenge);
+ f->dump_unsigned("server_challenge_plus_one", server_challenge_plus_one);
+ }
+ static void generate_test_instances(std::list<CephXAuthorize*>& ls) {
+ ls.push_back(new CephXAuthorize);
+ ls.push_back(new CephXAuthorize);
+ ls.back()->nonce = 1234;
+ ls.back()->have_challenge = true;
+ ls.back()->server_challenge_plus_one = 1234;
+ }
};
WRITE_CLASS_ENCODER(CephXAuthorize)
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/listing.py b/src/ceph-volume/ceph_volume/devices/lvm/listing.py
index c16afdaa767..8fb9d8ddcf8 100644
--- a/src/ceph-volume/ceph_volume/devices/lvm/listing.py
+++ b/src/ceph-volume/ceph_volume/devices/lvm/listing.py
@@ -153,7 +153,9 @@ class List(object):
elif arg[0] == '/':
lv = api.get_lvs_from_path(arg)
else:
- lv = [api.get_single_lv(filters={'lv_name': arg.split('/')[1]})]
+ vg_name, lv_name = arg.split('/')
+ lv = [api.get_single_lv(filters={'lv_name': lv_name,
+ 'vg_name': vg_name})]
report = self.create_report(lv)
diff --git a/src/ceph-volume/ceph_volume/util/device.py b/src/ceph-volume/ceph_volume/util/device.py
index bb806292f2c..c3de2a97fd9 100644
--- a/src/ceph-volume/ceph_volume/util/device.py
+++ b/src/ceph-volume/ceph_volume/util/device.py
@@ -460,11 +460,11 @@ class Device(object):
def device_type(self):
self.load_blkid_api()
if 'type' in self.sys_api:
- return self.sys_api['type']
+ return self.sys_api.get('type')
elif self.disk_api:
- return self.disk_api['TYPE']
+ return self.disk_api.get('TYPE')
elif self.blkid_api:
- return self.blkid_api['TYPE']
+ return self.blkid_api.get('TYPE')
@property
def is_mpath(self):
@@ -478,9 +478,9 @@ class Device(object):
def is_partition(self):
self.load_blkid_api()
if self.disk_api:
- return self.disk_api['TYPE'] == 'part'
+ return self.disk_api.get('TYPE') == 'part'
elif self.blkid_api:
- return self.blkid_api['TYPE'] == 'part'
+ return self.blkid_api.get('TYPE') == 'part'
return False
@property
diff --git a/src/ceph-volume/ceph_volume/util/disk.py b/src/ceph-volume/ceph_volume/util/disk.py
index 04104936985..0bbd5336d5a 100644
--- a/src/ceph-volume/ceph_volume/util/disk.py
+++ b/src/ceph-volume/ceph_volume/util/disk.py
@@ -802,7 +802,7 @@ def get_partitions(_sys_dev_block_path ='/sys/dev/block'):
result = dict()
for device in devices:
device_path = os.path.join(_sys_dev_block_path, device)
- is_partition = get_file_contents(os.path.join(device_path, 'partition')) == "1"
+ is_partition = int(get_file_contents(os.path.join(device_path, 'partition'), '0')) > 0
if not is_partition:
continue
diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py
index 348f581f9e6..ec90db6305b 100755
--- a/src/cephadm/cephadm.py
+++ b/src/cephadm/cephadm.py
@@ -18,7 +18,7 @@ import tempfile
import time
import errno
import ssl
-from typing import Dict, List, Tuple, Optional, Union, Any, Callable, IO, Sequence, TypeVar, cast, Iterable, TextIO
+from typing import Dict, List, Tuple, Optional, Union, Any, Callable, Sequence, TypeVar, cast, Iterable
import re
import uuid
@@ -28,29 +28,14 @@ from functools import wraps
from glob import glob
from io import StringIO
from threading import Thread, Event
-from urllib.error import HTTPError, URLError
from urllib.request import urlopen, Request
from pathlib import Path
from cephadmlib.constants import (
# default images
- DEFAULT_ALERT_MANAGER_IMAGE,
- DEFAULT_ELASTICSEARCH_IMAGE,
- DEFAULT_GRAFANA_IMAGE,
- DEFAULT_HAPROXY_IMAGE,
DEFAULT_IMAGE,
DEFAULT_IMAGE_IS_MAIN,
DEFAULT_IMAGE_RELEASE,
- DEFAULT_JAEGER_AGENT_IMAGE,
- DEFAULT_JAEGER_COLLECTOR_IMAGE,
- DEFAULT_JAEGER_QUERY_IMAGE,
- DEFAULT_KEEPALIVED_IMAGE,
- DEFAULT_LOKI_IMAGE,
- DEFAULT_NODE_EXPORTER_IMAGE,
- DEFAULT_NVMEOF_IMAGE,
- DEFAULT_PROMETHEUS_IMAGE,
- DEFAULT_PROMTAIL_IMAGE,
- DEFAULT_SNMP_GATEWAY_IMAGE,
# other constant values
CEPH_CONF,
CEPH_CONF_DIR,
@@ -82,7 +67,6 @@ from cephadmlib.context_getters import (
get_config_and_keyring,
get_parm,
read_configuration_source,
- should_log_to_journald,
)
from cephadmlib.exceptions import (
ClusterAlreadyExists,
@@ -99,7 +83,6 @@ from cephadmlib.call_wrappers import (
concurrent_tasks,
)
from cephadmlib.container_engines import (
- Docker,
Podman,
check_container_engine,
find_container_engine,
@@ -107,7 +90,6 @@ from cephadmlib.container_engines import (
registry_login,
)
from cephadmlib.data_utils import (
- dict_get,
dict_get_join,
get_legacy_config_fsid,
is_fsid,
@@ -120,7 +102,6 @@ from cephadmlib.file_utils import (
get_file_timestamp,
makedirs,
pathify,
- populate_files,
read_file,
recursive_chown,
touch,
@@ -134,7 +115,6 @@ from cephadmlib.net_utils import (
check_subnet,
get_fqdn,
get_hostname,
- get_ip_addresses,
get_short_hostname,
ip_in_subnets,
is_ipv6,
@@ -145,10 +125,16 @@ from cephadmlib.net_utils import (
wrap_ipv6,
)
from cephadmlib.locking import FileLock
-from cephadmlib.daemon_identity import DaemonIdentity, DaemonSubIdentity
+from cephadmlib.daemon_identity import DaemonIdentity
from cephadmlib.packagers import create_packager, Packager
-from cephadmlib.logging import cephadm_init_logging, Highlight, LogDestination
+from cephadmlib.logging import (
+ cephadm_init_logging,
+ Highlight,
+ LogDestination,
+)
from cephadmlib.systemd import check_unit, check_units
+from cephadmlib import systemd_unit
+from cephadmlib import runscripts
from cephadmlib.container_types import (
CephContainer,
InitContainer,
@@ -169,10 +155,26 @@ from cephadmlib.daemon_form import (
register as register_daemon_form,
)
from cephadmlib.deploy import DeploymentType
-from cephadmlib.container_daemon_form import ContainerDaemonForm
+from cephadmlib.container_daemon_form import (
+ ContainerDaemonForm,
+ daemon_to_container,
+)
from cephadmlib.sysctl import install_sysctl, migrate_sysctl_dir
from cephadmlib.firewalld import Firewalld, update_firewalld
from cephadmlib import templating
+from cephadmlib.daemons.ceph import get_ceph_mounts_for_type, ceph_daemons
+from cephadmlib.daemons import (
+ Ceph,
+ CephIscsi,
+ CephNvmeof,
+ CustomContainer,
+ HAproxy,
+ Keepalived,
+ Monitoring,
+ NFSGanesha,
+ SNMPGateway,
+ Tracing,
+)
FuncT = TypeVar('FuncT', bound=Callable)
@@ -208,1980 +210,6 @@ class ContainerInfo:
##################################
-@register_daemon_form
-class Ceph(ContainerDaemonForm):
- _daemons = ('mon', 'mgr', 'osd', 'mds', 'rgw', 'rbd-mirror',
- 'crash', 'cephfs-mirror')
-
- @classmethod
- def for_daemon_type(cls, daemon_type: str) -> bool:
- # TODO: figure out a way to un-special-case osd
- return daemon_type in cls._daemons and daemon_type != 'osd'
-
- def __init__(self, ctx: CephadmContext, ident: DaemonIdentity) -> None:
- self.ctx = ctx
- self._identity = ident
- self.user_supplied_config = False
-
- @classmethod
- def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'Ceph':
- return cls(ctx, ident)
-
- @property
- def identity(self) -> DaemonIdentity:
- return self._identity
-
- def firewall_service_name(self) -> str:
- if self.identity.daemon_type == 'mon':
- return 'ceph-mon'
- elif self.identity.daemon_type in ['mgr', 'mds']:
- return 'ceph'
- return ''
-
- def container(self, ctx: CephadmContext) -> CephContainer:
- # previous to being a ContainerDaemonForm, this make_var_run
- # call was hard coded in the deploy path. Eventually, it would be
- # good to move this somwhere cleaner and avoid needing to know the
- # uid/gid here.
- uid, gid = self.uid_gid(ctx)
- make_var_run(ctx, ctx.fsid, uid, gid)
-
- ctr = get_container(ctx, self.identity)
- ctr = to_deployment_container(ctx, ctr)
- config_json = fetch_configs(ctx)
- if self.identity.daemon_type == 'mon' and config_json is not None:
- if 'crush_location' in config_json:
- c_loc = config_json['crush_location']
- # was originally "c.args.extend(['--set-crush-location', c_loc])"
- # but that doesn't seem to persist in the object after it's passed
- # in further function calls
- ctr.args = ctr.args + ['--set-crush-location', c_loc]
- return ctr
-
- _uid_gid: Optional[Tuple[int, int]] = None
-
- def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
- if self._uid_gid is None:
- self._uid_gid = extract_uid_gid(ctx)
- return self._uid_gid
-
- def config_and_keyring(
- self, ctx: CephadmContext
- ) -> Tuple[Optional[str], Optional[str]]:
- return get_config_and_keyring(ctx)
-
- def get_daemon_args(self) -> List[str]:
- if self.identity.daemon_type == 'crash':
- return []
- r = [
- '--setuser', 'ceph',
- '--setgroup', 'ceph',
- '--default-log-to-file=false',
- ]
- log_to_journald = should_log_to_journald(self.ctx)
- if log_to_journald:
- r += [
- '--default-log-to-journald=true',
- '--default-log-to-stderr=false',
- ]
- else:
- r += [
- '--default-log-to-stderr=true',
- '--default-log-stderr-prefix=debug ',
- ]
- if self.identity.daemon_type == 'mon':
- r += [
- '--default-mon-cluster-log-to-file=false',
- ]
- if log_to_journald:
- r += [
- '--default-mon-cluster-log-to-journald=true',
- '--default-mon-cluster-log-to-stderr=false',
- ]
- else:
- r += ['--default-mon-cluster-log-to-stderr=true']
- return r
-
- @staticmethod
- def get_ceph_mounts(
- ctx: CephadmContext,
- ident: DaemonIdentity,
- no_config: bool = False,
- ) -> Dict[str, str]:
- # Warning: This is a hack done for more expedient refactoring
- mounts = _get_container_mounts_for_type(
- ctx, ident.fsid, ident.daemon_type
- )
- data_dir = ident.data_dir(ctx.data_dir)
- if ident.daemon_type == 'rgw':
- cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (
- ident.daemon_id
- )
- else:
- cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (
- ident.daemon_type,
- ident.daemon_id,
- )
- if ident.daemon_type != 'crash':
- mounts[data_dir] = cdata_dir + ':z'
- if not no_config:
- mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z'
- if ident.daemon_type in [
- 'rbd-mirror',
- 'cephfs-mirror',
- 'crash',
- 'ceph-exporter',
- ]:
- # these do not search for their keyrings in a data directory
- mounts[
- data_dir + '/keyring'
- ] = '/etc/ceph/ceph.client.%s.%s.keyring' % (
- ident.daemon_type,
- ident.daemon_id,
- )
- return mounts
-
- def customize_container_mounts(
- self, ctx: CephadmContext, mounts: Dict[str, str]
- ) -> None:
- cm = self.get_ceph_mounts(
- ctx,
- self.identity,
- no_config=self.ctx.config and self.user_supplied_config,
- )
- mounts.update(cm)
-
- def customize_container_args(
- self, ctx: CephadmContext, args: List[str]
- ) -> None:
- args.append(ctx.container_engine.unlimited_pids_option)
-
- def customize_process_args(
- self, ctx: CephadmContext, args: List[str]
- ) -> None:
- ident = self.identity
- if ident.daemon_type == 'rgw':
- name = 'client.rgw.%s' % ident.daemon_id
- elif ident.daemon_type == 'rbd-mirror':
- name = 'client.rbd-mirror.%s' % ident.daemon_id
- elif ident.daemon_type == 'cephfs-mirror':
- name = 'client.cephfs-mirror.%s' % ident.daemon_id
- elif ident.daemon_type == 'crash':
- name = 'client.crash.%s' % ident.daemon_id
- elif ident.daemon_type in ['mon', 'mgr', 'mds', 'osd']:
- name = ident.daemon_name
- else:
- raise ValueError(ident)
- args.extend(['-n', name])
- if ident.daemon_type != 'crash':
- args.append('-f')
- args.extend(self.get_daemon_args())
-
- def customize_container_envs(
- self, ctx: CephadmContext, envs: List[str]
- ) -> None:
- envs.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728')
-
- def default_entrypoint(self) -> str:
- ep = {
- 'rgw': '/usr/bin/radosgw',
- 'rbd-mirror': '/usr/bin/rbd-mirror',
- 'cephfs-mirror': '/usr/bin/cephfs-mirror',
- }
- daemon_type = self.identity.daemon_type
- return ep.get(daemon_type) or f'/usr/bin/ceph-{daemon_type}'
-
-##################################
-
-
-@register_daemon_form
-class OSD(Ceph):
- @classmethod
- def for_daemon_type(cls, daemon_type: str) -> bool:
- # TODO: figure out a way to un-special-case osd
- return daemon_type == 'osd'
-
- def __init__(
- self,
- ctx: CephadmContext,
- ident: DaemonIdentity,
- osd_fsid: Optional[str] = None,
- ) -> None:
- super().__init__(ctx, ident)
- self._osd_fsid = osd_fsid
-
- @classmethod
- def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'OSD':
- osd_fsid = getattr(ctx, 'osd_fsid', None)
- if osd_fsid is None:
- logger.info(
- 'Creating an OSD daemon form without an OSD FSID value'
- )
- return cls(ctx, ident, osd_fsid)
-
- @staticmethod
- def get_sysctl_settings() -> List[str]:
- return [
- '# allow a large number of OSDs',
- 'fs.aio-max-nr = 1048576',
- 'kernel.pid_max = 4194304',
- ]
-
- def firewall_service_name(self) -> str:
- return 'ceph'
-
- @property
- def osd_fsid(self) -> Optional[str]:
- return self._osd_fsid
-
-
-##################################
-
-
-@register_daemon_form
-class SNMPGateway(ContainerDaemonForm):
- """Defines an SNMP gateway between Prometheus and SNMP monitoring Frameworks"""
- daemon_type = 'snmp-gateway'
- SUPPORTED_VERSIONS = ['V2c', 'V3']
- default_image = DEFAULT_SNMP_GATEWAY_IMAGE
- DEFAULT_PORT = 9464
- env_filename = 'snmp-gateway.conf'
-
- @classmethod
- def for_daemon_type(cls, daemon_type: str) -> bool:
- return cls.daemon_type == daemon_type
-
- def __init__(self,
- ctx: CephadmContext,
- fsid: str,
- daemon_id: Union[int, str],
- config_json: Dict[str, Any],
- image: Optional[str] = None) -> None:
- self.ctx = ctx
- self.fsid = fsid
- self.daemon_id = daemon_id
- self.image = image or SNMPGateway.default_image
-
- self.uid = config_json.get('uid', 0)
- self.gid = config_json.get('gid', 0)
-
- self.destination = config_json.get('destination', '')
- self.snmp_version = config_json.get('snmp_version', 'V2c')
- self.snmp_community = config_json.get('snmp_community', 'public')
- self.log_level = config_json.get('log_level', 'info')
- self.snmp_v3_auth_username = config_json.get('snmp_v3_auth_username', '')
- self.snmp_v3_auth_password = config_json.get('snmp_v3_auth_password', '')
- self.snmp_v3_auth_protocol = config_json.get('snmp_v3_auth_protocol', '')
- self.snmp_v3_priv_protocol = config_json.get('snmp_v3_priv_protocol', '')
- self.snmp_v3_priv_password = config_json.get('snmp_v3_priv_password', '')
- self.snmp_v3_engine_id = config_json.get('snmp_v3_engine_id', '')
-
- self.validate()
-
- @classmethod
- def init(cls, ctx: CephadmContext, fsid: str,
- daemon_id: Union[int, str]) -> 'SNMPGateway':
- cfgs = fetch_configs(ctx)
- assert cfgs # assert some config data was found
- return cls(ctx, fsid, daemon_id, cfgs, ctx.image)
-
- @classmethod
- def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'SNMPGateway':
- return cls.init(ctx, ident.fsid, ident.daemon_id)
-
- @property
- def identity(self) -> DaemonIdentity:
- return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
-
- @staticmethod
- def get_version(ctx: CephadmContext, fsid: str, daemon_id: str) -> Optional[str]:
- """Return the version of the notifier from it's http endpoint"""
- path = os.path.join(ctx.data_dir, fsid, f'snmp-gateway.{daemon_id}', 'unit.meta')
- try:
- with open(path, 'r') as env:
- metadata = json.loads(env.read())
- except (OSError, json.JSONDecodeError):
- return None
-
- ports = metadata.get('ports', [])
- if not ports:
- return None
-
- try:
- with urlopen(f'http://127.0.0.1:{ports[0]}/') as r:
- html = r.read().decode('utf-8').split('\n')
- except (HTTPError, URLError):
- return None
-
- for h in html:
- stripped = h.strip()
- if stripped.startswith(('<pre>', '<PRE>')) and \
- stripped.endswith(('</pre>', '</PRE>')):
- # <pre>(version=1.2.1, branch=HEAD, revision=7...
- return stripped.split(',')[0].split('version=')[1]
-
- return None
-
- @property
- def port(self) -> int:
- endpoints = fetch_endpoints(self.ctx)
- if not endpoints:
- return self.DEFAULT_PORT
- return endpoints[0].port
-
- def get_daemon_args(self) -> List[str]:
- v3_args = []
- base_args = [
- f'--web.listen-address=:{self.port}',
- f'--snmp.destination={self.destination}',
- f'--snmp.version={self.snmp_version}',
- f'--log.level={self.log_level}',
- '--snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl'
- ]
-
- if self.snmp_version == 'V3':
- # common auth settings
- v3_args.extend([
- '--snmp.authentication-enabled',
- f'--snmp.authentication-protocol={self.snmp_v3_auth_protocol}',
- f'--snmp.security-engine-id={self.snmp_v3_engine_id}'
- ])
- # authPriv setting is applied if we have a privacy protocol setting
- if self.snmp_v3_priv_protocol:
- v3_args.extend([
- '--snmp.private-enabled',
- f'--snmp.private-protocol={self.snmp_v3_priv_protocol}'
- ])
-
- return base_args + v3_args
-
- @property
- def data_dir(self) -> str:
- return os.path.join(self.ctx.data_dir, self.ctx.fsid, f'{self.daemon_type}.{self.daemon_id}')
-
- @property
- def conf_file_path(self) -> str:
- return os.path.join(self.data_dir, self.env_filename)
-
- def create_daemon_conf(self) -> None:
- """Creates the environment file holding 'secrets' passed to the snmp-notifier daemon"""
- with write_new(self.conf_file_path) as f:
- if self.snmp_version == 'V2c':
- f.write(f'SNMP_NOTIFIER_COMMUNITY={self.snmp_community}\n')
- else:
- f.write(f'SNMP_NOTIFIER_AUTH_USERNAME={self.snmp_v3_auth_username}\n')
- f.write(f'SNMP_NOTIFIER_AUTH_PASSWORD={self.snmp_v3_auth_password}\n')
- if self.snmp_v3_priv_password:
- f.write(f'SNMP_NOTIFIER_PRIV_PASSWORD={self.snmp_v3_priv_password}\n')
-
- def validate(self) -> None:
- """Validate the settings
-
- Raises:
- Error: if the fsid doesn't look like an fsid
- Error: if the snmp version is not supported
- Error: destination IP and port address missing
- """
- if not is_fsid(self.fsid):
- raise Error(f'not a valid fsid: {self.fsid}')
-
- if self.snmp_version not in SNMPGateway.SUPPORTED_VERSIONS:
- raise Error(f'not a valid snmp version: {self.snmp_version}')
-
- if not self.destination:
- raise Error('config is missing destination attribute(<ip>:<port>) of the target SNMP listener')
-
- def container(self, ctx: CephadmContext) -> CephContainer:
- ctr = get_container(ctx, self.identity)
- return to_deployment_container(ctx, ctr)
-
- def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
- return self.uid, self.gid
-
- def customize_container_args(
- self, ctx: CephadmContext, args: List[str]
- ) -> None:
- args.append(f'--env-file={self.conf_file_path}')
-
- def customize_process_args(
- self, ctx: CephadmContext, args: List[str]
- ) -> None:
- args.extend(self.get_daemon_args())
-
-
-##################################
-@register_daemon_form
-class Monitoring(ContainerDaemonForm):
- """Define the configs for the monitoring containers"""
-
- port_map = {
- 'prometheus': [9095], # Avoid default 9090, due to conflict with cockpit UI
- 'node-exporter': [9100],
- 'grafana': [3000],
- 'alertmanager': [9093, 9094],
- 'loki': [3100],
- 'promtail': [9080]
- }
-
- components = {
- 'prometheus': {
- 'image': DEFAULT_PROMETHEUS_IMAGE,
- 'cpus': '2',
- 'memory': '4GB',
- 'args': [
- '--config.file=/etc/prometheus/prometheus.yml',
- '--storage.tsdb.path=/prometheus',
- ],
- 'config-json-files': [
- 'prometheus.yml',
- ],
- },
- 'loki': {
- 'image': DEFAULT_LOKI_IMAGE,
- 'cpus': '1',
- 'memory': '1GB',
- 'args': [
- '--config.file=/etc/loki/loki.yml',
- ],
- 'config-json-files': [
- 'loki.yml'
- ],
- },
- 'promtail': {
- 'image': DEFAULT_PROMTAIL_IMAGE,
- 'cpus': '1',
- 'memory': '1GB',
- 'args': [
- '--config.file=/etc/promtail/promtail.yml',
- ],
- 'config-json-files': [
- 'promtail.yml',
- ],
- },
- 'node-exporter': {
- 'image': DEFAULT_NODE_EXPORTER_IMAGE,
- 'cpus': '1',
- 'memory': '1GB',
- 'args': [
- '--no-collector.timex'
- ],
- },
- 'grafana': {
- 'image': DEFAULT_GRAFANA_IMAGE,
- 'cpus': '2',
- 'memory': '4GB',
- 'args': [],
- 'config-json-files': [
- 'grafana.ini',
- 'provisioning/datasources/ceph-dashboard.yml',
- 'certs/cert_file',
- 'certs/cert_key',
- ],
- },
- 'alertmanager': {
- 'image': DEFAULT_ALERT_MANAGER_IMAGE,
- 'cpus': '2',
- 'memory': '2GB',
- 'args': [
- '--cluster.listen-address=:{}'.format(port_map['alertmanager'][1]),
- ],
- 'config-json-files': [
- 'alertmanager.yml',
- ],
- 'config-json-args': [
- 'peers',
- ],
- },
- } # type: ignore
-
- @classmethod
- def for_daemon_type(cls, daemon_type: str) -> bool:
- return daemon_type in cls.components
-
- @staticmethod
- def get_version(ctx, container_id, daemon_type):
- # type: (CephadmContext, str, str) -> str
- """
- :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter"
- """
- assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter', 'loki', 'promtail')
- cmd = daemon_type.replace('-', '_')
- code = -1
- err = ''
- out = ''
- version = ''
- if daemon_type == 'alertmanager':
- for cmd in ['alertmanager', 'prometheus-alertmanager']:
- out, err, code = call(ctx, [
- ctx.container_engine.path, 'exec', container_id, cmd,
- '--version'
- ], verbosity=CallVerbosity.QUIET)
- if code == 0:
- break
- cmd = 'alertmanager' # reset cmd for version extraction
- else:
- out, err, code = call(ctx, [
- ctx.container_engine.path, 'exec', container_id, cmd, '--version'
- ], verbosity=CallVerbosity.QUIET)
- if code == 0:
- if err.startswith('%s, version ' % cmd):
- version = err.split(' ')[2]
- elif out.startswith('%s, version ' % cmd):
- version = out.split(' ')[2]
- return version
-
- @staticmethod
- def extract_uid_gid(
- ctx: CephadmContext, daemon_type: str
- ) -> Tuple[int, int]:
- if daemon_type == 'prometheus':
- uid, gid = extract_uid_gid(ctx, file_path='/etc/prometheus')
- elif daemon_type == 'node-exporter':
- uid, gid = 65534, 65534
- elif daemon_type == 'grafana':
- uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana')
- elif daemon_type == 'loki':
- uid, gid = extract_uid_gid(ctx, file_path='/etc/loki')
- elif daemon_type == 'promtail':
- uid, gid = extract_uid_gid(ctx, file_path='/etc/promtail')
- elif daemon_type == 'alertmanager':
- uid, gid = extract_uid_gid(
- ctx, file_path=['/etc/alertmanager', '/etc/prometheus']
- )
- else:
- raise Error('{} not implemented yet'.format(daemon_type))
- return uid, gid
-
- def __init__(self, ctx: CephadmContext, ident: DaemonIdentity) -> None:
- self.ctx = ctx
- self._identity = ident
-
- @classmethod
- def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'Monitoring':
- return cls(ctx, ident)
-
- @property
- def identity(self) -> DaemonIdentity:
- return self._identity
-
- def container(self, ctx: CephadmContext) -> CephContainer:
- self._prevalidate(ctx)
- ctr = get_container(ctx, self.identity)
- return to_deployment_container(ctx, ctr)
-
- def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
- return self.extract_uid_gid(ctx, self.identity.daemon_type)
-
- def _prevalidate(self, ctx: CephadmContext) -> None:
- # before being refactored into a ContainerDaemonForm these checks were
- # done inside the deploy function. This was the only "family" of daemons
- # that performed these checks in that location
- daemon_type = self.identity.daemon_type
- config = fetch_configs(ctx) # type: ignore
- required_files = self.components[daemon_type].get(
- 'config-json-files', list()
- )
- required_args = self.components[daemon_type].get(
- 'config-json-args', list()
- )
- if required_files:
- if not config or not all(c in config.get('files', {}).keys() for c in required_files): # type: ignore
- raise Error(
- '{} deployment requires config-json which must '
- 'contain file content for {}'.format(
- daemon_type.capitalize(), ', '.join(required_files)
- )
- )
- if required_args:
- if not config or not all(c in config.keys() for c in required_args): # type: ignore
- raise Error(
- '{} deployment requires config-json which must '
- 'contain arg for {}'.format(
- daemon_type.capitalize(), ', '.join(required_args)
- )
- )
-
- def get_daemon_args(self) -> List[str]:
- ctx = self.ctx
- daemon_type = self.identity.daemon_type
- metadata = self.components[daemon_type]
- r = list(metadata.get('args', []))
- # set ip and port to bind to for nodeexporter,alertmanager,prometheus
- if daemon_type not in ['grafana', 'loki', 'promtail']:
- ip = ''
- port = self.port_map[daemon_type][0]
- meta = fetch_meta(ctx)
- if meta:
- if 'ip' in meta and meta['ip']:
- ip = meta['ip']
- if 'ports' in meta and meta['ports']:
- port = meta['ports'][0]
- r += [f'--web.listen-address={ip}:{port}']
- if daemon_type == 'prometheus':
- config = fetch_configs(ctx)
- retention_time = config.get('retention_time', '15d')
- retention_size = config.get('retention_size', '0') # default to disabled
- r += [f'--storage.tsdb.retention.time={retention_time}']
- r += [f'--storage.tsdb.retention.size={retention_size}']
- scheme = 'http'
- host = get_fqdn()
- # in case host is not an fqdn then we use the IP to
- # avoid producing a broken web.external-url link
- if '.' not in host:
- ipv4_addrs, ipv6_addrs = get_ip_addresses(get_hostname())
- # use the first ipv4 (if any) otherwise use the first ipv6
- addr = next(iter(ipv4_addrs or ipv6_addrs), None)
- host = wrap_ipv6(addr) if addr else host
- r += [f'--web.external-url={scheme}://{host}:{port}']
- if daemon_type == 'alertmanager':
- config = fetch_configs(ctx)
- peers = config.get('peers', list()) # type: ignore
- for peer in peers:
- r += ['--cluster.peer={}'.format(peer)]
- try:
- r += [f'--web.config.file={config["web_config"]}']
- except KeyError:
- pass
- # some alertmanager, by default, look elsewhere for a config
- r += ['--config.file=/etc/alertmanager/alertmanager.yml']
- if daemon_type == 'promtail':
- r += ['--config.expand-env']
- if daemon_type == 'prometheus':
- config = fetch_configs(ctx)
- try:
- r += [f'--web.config.file={config["web_config"]}']
- except KeyError:
- pass
- if daemon_type == 'node-exporter':
- config = fetch_configs(ctx)
- try:
- r += [f'--web.config.file={config["web_config"]}']
- except KeyError:
- pass
- r += ['--path.procfs=/host/proc',
- '--path.sysfs=/host/sys',
- '--path.rootfs=/rootfs']
- return r
-
- def _get_container_mounts(self, data_dir: str) -> Dict[str, str]:
- ctx = self.ctx
- daemon_type = self.identity.daemon_type
- mounts: Dict[str, str] = {}
- log_dir = get_log_dir(self.identity.fsid, ctx.log_dir)
- if daemon_type == 'prometheus':
- mounts[
- os.path.join(data_dir, 'etc/prometheus')
- ] = '/etc/prometheus:Z'
- mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z'
- elif daemon_type == 'loki':
- mounts[os.path.join(data_dir, 'etc/loki')] = '/etc/loki:Z'
- mounts[os.path.join(data_dir, 'data')] = '/loki:Z'
- elif daemon_type == 'promtail':
- mounts[os.path.join(data_dir, 'etc/promtail')] = '/etc/promtail:Z'
- mounts[log_dir] = '/var/log/ceph:z'
- mounts[os.path.join(data_dir, 'data')] = '/promtail:Z'
- elif daemon_type == 'node-exporter':
- mounts[
- os.path.join(data_dir, 'etc/node-exporter')
- ] = '/etc/node-exporter:Z'
- mounts['/proc'] = '/host/proc:ro'
- mounts['/sys'] = '/host/sys:ro'
- mounts['/'] = '/rootfs:ro'
- elif daemon_type == 'grafana':
- mounts[
- os.path.join(data_dir, 'etc/grafana/grafana.ini')
- ] = '/etc/grafana/grafana.ini:Z'
- mounts[
- os.path.join(data_dir, 'etc/grafana/provisioning/datasources')
- ] = '/etc/grafana/provisioning/datasources:Z'
- mounts[
- os.path.join(data_dir, 'etc/grafana/certs')
- ] = '/etc/grafana/certs:Z'
- mounts[
- os.path.join(data_dir, 'data/grafana.db')
- ] = '/var/lib/grafana/grafana.db:Z'
- elif daemon_type == 'alertmanager':
- mounts[
- os.path.join(data_dir, 'etc/alertmanager')
- ] = '/etc/alertmanager:Z'
- return mounts
-
- def customize_container_mounts(
- self, ctx: CephadmContext, mounts: Dict[str, str]
- ) -> None:
- data_dir = self.identity.data_dir(ctx.data_dir)
- mounts.update(self._get_container_mounts(data_dir))
-
- def customize_container_args(
- self, ctx: CephadmContext, args: List[str]
- ) -> None:
- uid, _ = self.uid_gid(ctx)
- monitoring_args = [
- '--user',
- str(uid),
- # FIXME: disable cpu/memory limits for the time being (not supported
- # by ubuntu 18.04 kernel!)
- ]
- args.extend(monitoring_args)
- if self.identity.daemon_type == 'node-exporter':
- # in order to support setting '--path.procfs=/host/proc','--path.sysfs=/host/sys',
- # '--path.rootfs=/rootfs' for node-exporter we need to disable selinux separation
- # between the node-exporter container and the host to avoid selinux denials
- args.extend(['--security-opt', 'label=disable'])
-
- def customize_process_args(
- self, ctx: CephadmContext, args: List[str]
- ) -> None:
- args.extend(self.get_daemon_args())
-
- def default_entrypoint(self) -> str:
- return ''
-
-##################################
-
-
-@register_daemon_form
-class NFSGanesha(ContainerDaemonForm):
- """Defines a NFS-Ganesha container"""
-
- daemon_type = 'nfs'
- entrypoint = '/usr/bin/ganesha.nfsd'
- daemon_args = ['-F', '-L', 'STDERR']
-
- required_files = ['ganesha.conf']
-
- port_map = {
- 'nfs': 2049,
- }
-
- @classmethod
- def for_daemon_type(cls, daemon_type: str) -> bool:
- return cls.daemon_type == daemon_type
-
- def __init__(self,
- ctx,
- fsid,
- daemon_id,
- config_json,
- image=DEFAULT_IMAGE):
- # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
- self.ctx = ctx
- self.fsid = fsid
- self.daemon_id = daemon_id
- self.image = image
-
- # config-json options
- self.pool = dict_get(config_json, 'pool', require=True)
- self.namespace = dict_get(config_json, 'namespace')
- self.userid = dict_get(config_json, 'userid')
- self.extra_args = dict_get(config_json, 'extra_args', [])
- self.files = dict_get(config_json, 'files', {})
- self.rgw = dict_get(config_json, 'rgw', {})
-
- # validate the supplied args
- self.validate()
-
- @classmethod
- def init(cls, ctx, fsid, daemon_id):
- # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha
- return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
-
- @classmethod
- def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'NFSGanesha':
- return cls.init(ctx, ident.fsid, ident.daemon_id)
-
- @property
- def identity(self) -> DaemonIdentity:
- return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
-
- def _get_container_mounts(self, data_dir):
- # type: (str) -> Dict[str, str]
- mounts = dict()
- mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
- mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
- mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z'
- if self.rgw:
- cluster = self.rgw.get('cluster', 'ceph')
- rgw_user = self.rgw.get('user', 'admin')
- mounts[os.path.join(data_dir, 'keyring.rgw')] = \
- '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user)
- return mounts
-
- def customize_container_mounts(
- self, ctx: CephadmContext, mounts: Dict[str, str]
- ) -> None:
- data_dir = self.identity.data_dir(ctx.data_dir)
- mounts.update(self._get_container_mounts(data_dir))
-
- @staticmethod
- def get_container_envs():
- # type: () -> List[str]
- envs = [
- 'CEPH_CONF=%s' % (CEPH_DEFAULT_CONF)
- ]
- return envs
-
- @staticmethod
- def get_version(ctx, container_id):
- # type: (CephadmContext, str) -> Optional[str]
- version = None
- out, err, code = call(ctx,
- [ctx.container_engine.path, 'exec', container_id,
- NFSGanesha.entrypoint, '-v'],
- verbosity=CallVerbosity.QUIET)
- if code == 0:
- match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out)
- if match:
- version = match.group(1)
- return version
-
- def validate(self):
- # type: () -> None
- if not is_fsid(self.fsid):
- raise Error('not an fsid: %s' % self.fsid)
- if not self.daemon_id:
- raise Error('invalid daemon_id: %s' % self.daemon_id)
- if not self.image:
- raise Error('invalid image: %s' % self.image)
-
- # check for the required files
- if self.required_files:
- for fname in self.required_files:
- if fname not in self.files:
- raise Error('required file missing from config-json: %s' % fname)
-
- # check for an RGW config
- if self.rgw:
- if not self.rgw.get('keyring'):
- raise Error('RGW keyring is missing')
- if not self.rgw.get('user'):
- raise Error('RGW user is missing')
-
- def get_daemon_name(self):
- # type: () -> str
- return '%s.%s' % (self.daemon_type, self.daemon_id)
-
- def get_container_name(self, desc=None):
- # type: (Optional[str]) -> str
- cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
- if desc:
- cname = '%s-%s' % (cname, desc)
- return cname
-
- def get_daemon_args(self):
- # type: () -> List[str]
- return self.daemon_args + self.extra_args
-
- def create_daemon_dirs(self, data_dir, uid, gid):
- # type: (str, int, int) -> None
- """Create files under the container data dir"""
- if not os.path.isdir(data_dir):
- raise OSError('data_dir is not a directory: %s' % (data_dir))
-
- logger.info('Creating ganesha config...')
-
- # create the ganesha conf dir
- config_dir = os.path.join(data_dir, 'etc/ganesha')
- makedirs(config_dir, uid, gid, 0o755)
-
- # populate files from the config-json
- populate_files(config_dir, self.files, uid, gid)
-
- # write the RGW keyring
- if self.rgw:
- keyring_path = os.path.join(data_dir, 'keyring.rgw')
- with write_new(keyring_path, owner=(uid, gid)) as f:
- f.write(self.rgw.get('keyring', ''))
-
- def firewall_service_name(self) -> str:
- return 'nfs'
-
- def container(self, ctx: CephadmContext) -> CephContainer:
- ctr = get_container(ctx, self.identity)
- return to_deployment_container(ctx, ctr)
-
- def customize_container_endpoints(
- self, endpoints: List[EndPoint], deployment_type: DeploymentType
- ) -> None:
- if deployment_type == DeploymentType.DEFAULT and not endpoints:
- nfs_ports = list(NFSGanesha.port_map.values())
- endpoints.extend([EndPoint('0.0.0.0', p) for p in nfs_ports])
-
- def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
- # TODO: extract ganesha uid/gid (997, 994) ?
- return extract_uid_gid(ctx)
-
- def config_and_keyring(
- self, ctx: CephadmContext
- ) -> Tuple[Optional[str], Optional[str]]:
- return get_config_and_keyring(ctx)
-
- def customize_container_envs(
- self, ctx: CephadmContext, envs: List[str]
- ) -> None:
- envs.extend(self.get_container_envs())
-
- def customize_process_args(
- self, ctx: CephadmContext, args: List[str]
- ) -> None:
- args.extend(self.get_daemon_args())
-
- def customize_container_args(
- self, ctx: CephadmContext, args: List[str]
- ) -> None:
- args.append(ctx.container_engine.unlimited_pids_option)
-
- def default_entrypoint(self) -> str:
- return self.entrypoint
-
-##################################
-
-
-@register_daemon_form
-class CephIscsi(ContainerDaemonForm):
- """Defines a Ceph-Iscsi container"""
-
- daemon_type = 'iscsi'
- entrypoint = '/usr/bin/rbd-target-api'
-
- required_files = ['iscsi-gateway.cfg']
-
- @classmethod
- def for_daemon_type(cls, daemon_type: str) -> bool:
- return cls.daemon_type == daemon_type
-
- def __init__(self,
- ctx,
- fsid,
- daemon_id,
- config_json,
- image=DEFAULT_IMAGE):
- # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
- self.ctx = ctx
- self.fsid = fsid
- self.daemon_id = daemon_id
- self.image = image
-
- # config-json options
- self.files = dict_get(config_json, 'files', {})
-
- # validate the supplied args
- self.validate()
-
- @classmethod
- def init(cls, ctx, fsid, daemon_id):
- # type: (CephadmContext, str, Union[int, str]) -> CephIscsi
- return cls(ctx, fsid, daemon_id,
- fetch_configs(ctx), ctx.image)
-
- @classmethod
- def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'CephIscsi':
- return cls.init(ctx, ident.fsid, ident.daemon_id)
-
- @property
- def identity(self) -> DaemonIdentity:
- return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
-
- @staticmethod
- def _get_container_mounts(data_dir, log_dir):
- # type: (str, str) -> Dict[str, str]
- mounts = dict()
- mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
- mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
- mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
- mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
- mounts[os.path.join(data_dir, 'tcmu-runner-entrypoint.sh')] = '/usr/local/scripts/tcmu-runner-entrypoint.sh'
- mounts[log_dir] = '/var/log:z'
- mounts['/dev'] = '/dev'
- return mounts
-
- def customize_container_mounts(
- self, ctx: CephadmContext, mounts: Dict[str, str]
- ) -> None:
- data_dir = self.identity.data_dir(ctx.data_dir)
- # Removes ending ".tcmu" from data_dir a tcmu-runner uses the same
- # data_dir as rbd-runner-api
- if data_dir.endswith('.tcmu'):
- data_dir = re.sub(r'\.tcmu$', '', data_dir)
- log_dir = get_log_dir(self.identity.fsid, ctx.log_dir)
- mounts.update(CephIscsi._get_container_mounts(data_dir, log_dir))
-
- def customize_container_binds(
- self, ctx: CephadmContext, binds: List[List[str]]
- ) -> None:
- lib_modules = [
- 'type=bind',
- 'source=/lib/modules',
- 'destination=/lib/modules',
- 'ro=true',
- ]
- binds.append(lib_modules)
-
- @staticmethod
- def get_version(ctx, container_id):
- # type: (CephadmContext, str) -> Optional[str]
- version = None
- out, err, code = call(ctx,
- [ctx.container_engine.path, 'exec', container_id,
- '/usr/bin/python3', '-c',
- "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"],
- verbosity=CallVerbosity.QUIET)
- if code == 0:
- version = out.strip()
- return version
-
- def validate(self):
- # type: () -> None
- if not is_fsid(self.fsid):
- raise Error('not an fsid: %s' % self.fsid)
- if not self.daemon_id:
- raise Error('invalid daemon_id: %s' % self.daemon_id)
- if not self.image:
- raise Error('invalid image: %s' % self.image)
-
- # check for the required files
- if self.required_files:
- for fname in self.required_files:
- if fname not in self.files:
- raise Error('required file missing from config-json: %s' % fname)
-
- def get_daemon_name(self):
- # type: () -> str
- return '%s.%s' % (self.daemon_type, self.daemon_id)
-
- def get_container_name(self, desc=None):
- # type: (Optional[str]) -> str
- cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
- if desc:
- cname = '%s-%s' % (cname, desc)
- return cname
-
- def create_daemon_dirs(self, data_dir, uid, gid):
- # type: (str, int, int) -> None
- """Create files under the container data dir"""
- if not os.path.isdir(data_dir):
- raise OSError('data_dir is not a directory: %s' % (data_dir))
-
- logger.info('Creating ceph-iscsi config...')
- configfs_dir = os.path.join(data_dir, 'configfs')
- makedirs(configfs_dir, uid, gid, 0o755)
-
- # set up the tcmu-runner entrypoint script
- # to be mounted into the container. For more info
- # on why we need this script, see the
- # tcmu_runner_entrypoint_script function
- self.files['tcmu-runner-entrypoint.sh'] = self.tcmu_runner_entrypoint_script()
-
- # populate files from the config-json
- populate_files(data_dir, self.files, uid, gid)
-
- # we want the tcmu runner entrypoint script to be executable
- # populate_files will give it 0o600 by default
- os.chmod(os.path.join(data_dir, 'tcmu-runner-entrypoint.sh'), 0o700)
-
- @staticmethod
- def configfs_mount_umount(data_dir, mount=True):
- # type: (str, bool) -> List[str]
- mount_path = os.path.join(data_dir, 'configfs')
- if mount:
- cmd = 'if ! grep -qs {0} /proc/mounts; then ' \
- 'mount -t configfs none {0}; fi'.format(mount_path)
- else:
- cmd = 'if grep -qs {0} /proc/mounts; then ' \
- 'umount {0}; fi'.format(mount_path)
- return cmd.split()
-
- @staticmethod
- def tcmu_runner_entrypoint_script() -> str:
- # since we are having tcmu-runner be a background
- # process in its systemd unit (rbd-target-api being
- # the main process) systemd will not restart it when
- # it fails. in order to try and get around that for now
- # we can have a script mounted in the container that
- # that attempts to do the restarting for us. This script
- # can then become the entrypoint for the tcmu-runner
- # container
-
- # This is intended to be dropped for a better solution
- # for at least the squid release onward
- return """#!/bin/bash
-RUN_DIR=/var/run/tcmu-runner
-
-if [ ! -d "${RUN_DIR}" ] ; then
- mkdir -p "${RUN_DIR}"
-fi
-
-rm -rf "${RUN_DIR}"/*
-
-while true
-do
- touch "${RUN_DIR}"/start-up-$(date -Ins)
- /usr/bin/tcmu-runner
-
- # If we got around 3 kills/segfaults in the last minute,
- # don't start anymore
- if [ $(find "${RUN_DIR}" -type f -cmin -1 | wc -l) -ge 3 ] ; then
- exit 0
- fi
-
- sleep 1
-done
-"""
-
- def get_tcmu_runner_container(self):
- # type: () -> CephContainer
- # daemon_id, is used to generated the cid and pid files used by podman but as both tcmu-runner
- # and rbd-target-api have the same daemon_id, it conflits and prevent the second container from
- # starting. .tcmu runner is appended to the daemon_id to fix that.
- subident = DaemonSubIdentity(
- self.fsid, self.daemon_type, self.daemon_id, 'tcmu'
- )
- tcmu_container = to_deployment_container(
- self.ctx, get_container(self.ctx, subident)
- )
- # TODO: Eventually we don't want to run tcmu-runner through this script.
- # This is intended to be a workaround backported to older releases
- # and should eventually be removed in at least squid onward
- tcmu_container.entrypoint = '/usr/local/scripts/tcmu-runner-entrypoint.sh'
- tcmu_container.cname = self.get_container_name(desc='tcmu')
- return tcmu_container
-
- def container(self, ctx: CephadmContext) -> CephContainer:
- ctr = get_container(ctx, self.identity)
- return to_deployment_container(ctx, ctr)
-
- def config_and_keyring(
- self, ctx: CephadmContext
- ) -> Tuple[Optional[str], Optional[str]]:
- return get_config_and_keyring(ctx)
-
- def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
- return extract_uid_gid(ctx)
-
- def default_entrypoint(self) -> str:
- return self.entrypoint
-
- def customize_container_args(
- self, ctx: CephadmContext, args: List[str]
- ) -> None:
- args.append(ctx.container_engine.unlimited_pids_option)
-
-##################################
-
-
-@register_daemon_form
-class CephNvmeof(ContainerDaemonForm):
- """Defines a Ceph-Nvmeof container"""
-
- daemon_type = 'nvmeof'
- required_files = ['ceph-nvmeof.conf']
- default_image = DEFAULT_NVMEOF_IMAGE
-
- @classmethod
- def for_daemon_type(cls, daemon_type: str) -> bool:
- return cls.daemon_type == daemon_type
-
- def __init__(self,
- ctx,
- fsid,
- daemon_id,
- config_json,
- image=DEFAULT_NVMEOF_IMAGE):
- # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
- self.ctx = ctx
- self.fsid = fsid
- self.daemon_id = daemon_id
- self.image = image
-
- # config-json options
- self.files = dict_get(config_json, 'files', {})
-
- # validate the supplied args
- self.validate()
-
- @classmethod
- def init(cls, ctx, fsid, daemon_id):
- # type: (CephadmContext, str, Union[int, str]) -> CephNvmeof
- return cls(ctx, fsid, daemon_id,
- fetch_configs(ctx), ctx.image)
-
- @classmethod
- def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'CephNvmeof':
- return cls.init(ctx, ident.fsid, ident.daemon_id)
-
- @property
- def identity(self) -> DaemonIdentity:
- return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
-
- @staticmethod
- def _get_container_mounts(data_dir: str) -> Dict[str, str]:
- mounts = dict()
- mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
- mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
- mounts[os.path.join(data_dir, 'ceph-nvmeof.conf')] = '/src/ceph-nvmeof.conf:z'
- mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
- mounts['/dev/hugepages'] = '/dev/hugepages'
- mounts['/dev/vfio/vfio'] = '/dev/vfio/vfio'
- return mounts
-
- def customize_container_mounts(
- self, ctx: CephadmContext, mounts: Dict[str, str]
- ) -> None:
- data_dir = self.identity.data_dir(ctx.data_dir)
- mounts.update(self._get_container_mounts(data_dir))
-
- def customize_container_binds(
- self, ctx: CephadmContext, binds: List[List[str]]
- ) -> None:
- lib_modules = [
- 'type=bind',
- 'source=/lib/modules',
- 'destination=/lib/modules',
- 'ro=true',
- ]
- binds.append(lib_modules)
-
- @staticmethod
- def get_version(ctx: CephadmContext, container_id: str) -> Optional[str]:
- out, err, ret = call(ctx,
- [ctx.container_engine.path, 'inspect',
- '--format', '{{index .Config.Labels "io.ceph.version"}}',
- ctx.image])
- version = None
- if ret == 0:
- version = out.strip()
- return version
-
- def validate(self):
- # type: () -> None
- if not is_fsid(self.fsid):
- raise Error('not an fsid: %s' % self.fsid)
- if not self.daemon_id:
- raise Error('invalid daemon_id: %s' % self.daemon_id)
- if not self.image:
- raise Error('invalid image: %s' % self.image)
-
- # check for the required files
- if self.required_files:
- for fname in self.required_files:
- if fname not in self.files:
- raise Error('required file missing from config-json: %s' % fname)
-
- def get_daemon_name(self):
- # type: () -> str
- return '%s.%s' % (self.daemon_type, self.daemon_id)
-
- def get_container_name(self, desc=None):
- # type: (Optional[str]) -> str
- cname = '%s-%s' % (self.fsid, self.get_daemon_name())
- if desc:
- cname = '%s-%s' % (cname, desc)
- return cname
-
- def create_daemon_dirs(self, data_dir, uid, gid):
- # type: (str, int, int) -> None
- """Create files under the container data dir"""
- if not os.path.isdir(data_dir):
- raise OSError('data_dir is not a directory: %s' % (data_dir))
-
- logger.info('Creating ceph-nvmeof config...')
- configfs_dir = os.path.join(data_dir, 'configfs')
- makedirs(configfs_dir, uid, gid, 0o755)
-
- # populate files from the config-json
- populate_files(data_dir, self.files, uid, gid)
-
- @staticmethod
- def configfs_mount_umount(data_dir, mount=True):
- # type: (str, bool) -> List[str]
- mount_path = os.path.join(data_dir, 'configfs')
- if mount:
- cmd = 'if ! grep -qs {0} /proc/mounts; then ' \
- 'mount -t configfs none {0}; fi'.format(mount_path)
- else:
- cmd = 'if grep -qs {0} /proc/mounts; then ' \
- 'umount {0}; fi'.format(mount_path)
- return cmd.split()
-
- @staticmethod
- def get_sysctl_settings() -> List[str]:
- return [
- 'vm.nr_hugepages = 4096',
- ]
-
- def container(self, ctx: CephadmContext) -> CephContainer:
- ctr = get_container(ctx, self.identity)
- return to_deployment_container(ctx, ctr)
-
- def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
- return 167, 167 # TODO: need to get properly the uid/gid
-
- def config_and_keyring(
- self, ctx: CephadmContext
- ) -> Tuple[Optional[str], Optional[str]]:
- return get_config_and_keyring(ctx)
-
- def customize_container_args(
- self, ctx: CephadmContext, args: List[str]
- ) -> None:
- args.append(ctx.container_engine.unlimited_pids_option)
- args.extend(['--ulimit', 'memlock=-1:-1'])
- args.extend(['--ulimit', 'nofile=10240'])
- args.extend(['--cap-add=SYS_ADMIN', '--cap-add=CAP_SYS_NICE'])
-
-
-##################################
-
-
-@register_daemon_form
-class CephExporter(ContainerDaemonForm):
- """Defines a Ceph exporter container"""
-
- daemon_type = 'ceph-exporter'
- entrypoint = '/usr/bin/ceph-exporter'
- DEFAULT_PORT = 9926
- port_map = {
- 'ceph-exporter': DEFAULT_PORT,
- }
-
- @classmethod
- def for_daemon_type(cls, daemon_type: str) -> bool:
- return cls.daemon_type == daemon_type
-
- def __init__(self,
- ctx: CephadmContext,
- fsid: str, daemon_id: Union[int, str],
- config_json: Dict[str, Any],
- image: str = DEFAULT_IMAGE) -> None:
- self.ctx = ctx
- self.fsid = fsid
- self.daemon_id = daemon_id
- self.image = image
-
- self.sock_dir = config_json.get('sock-dir', '/var/run/ceph/')
- ipv4_addrs, _ = get_ip_addresses(get_hostname())
- addrs = '0.0.0.0' if ipv4_addrs else '::'
- self.addrs = config_json.get('addrs', addrs)
- self.port = config_json.get('port', self.DEFAULT_PORT)
- self.prio_limit = config_json.get('prio-limit', 5)
- self.stats_period = config_json.get('stats-period', 5)
-
- self.validate()
-
- @classmethod
- def init(cls, ctx: CephadmContext, fsid: str,
- daemon_id: Union[int, str]) -> 'CephExporter':
- return cls(ctx, fsid, daemon_id,
- fetch_configs(ctx), ctx.image)
-
- @classmethod
- def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'CephExporter':
- return cls.init(ctx, ident.fsid, ident.daemon_id)
-
- @property
- def identity(self) -> DaemonIdentity:
- return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
-
- def get_daemon_args(self) -> List[str]:
- args = [
- f'--sock-dir={self.sock_dir}',
- f'--addrs={self.addrs}',
- f'--port={self.port}',
- f'--prio-limit={self.prio_limit}',
- f'--stats-period={self.stats_period}',
- ]
- return args
-
- def validate(self) -> None:
- if not os.path.isdir(self.sock_dir):
- raise Error(f'Directory does not exist. Got: {self.sock_dir}')
-
- def container(self, ctx: CephadmContext) -> CephContainer:
- ctr = get_container(ctx, self.identity)
- return to_deployment_container(ctx, ctr)
-
- def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
- return extract_uid_gid(ctx)
-
- def config_and_keyring(
- self, ctx: CephadmContext
- ) -> Tuple[Optional[str], Optional[str]]:
- return get_config_and_keyring(ctx)
-
- def customize_container_mounts(
- self, ctx: CephadmContext, mounts: Dict[str, str]
- ) -> None:
- cm = Ceph.get_ceph_mounts(ctx, self.identity)
- mounts.update(cm)
-
- def customize_process_args(
- self, ctx: CephadmContext, args: List[str]
- ) -> None:
- name = 'client.ceph-exporter.%s' % self.identity.daemon_id
- args.extend(['-n', name, '-f'])
- args.extend(self.get_daemon_args())
-
- def customize_container_args(
- self, ctx: CephadmContext, args: List[str]
- ) -> None:
- args.append(ctx.container_engine.unlimited_pids_option)
-
- def customize_container_envs(
- self, ctx: CephadmContext, envs: List[str]
- ) -> None:
- envs.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728')
-
- def default_entrypoint(self) -> str:
- return self.entrypoint
-
-
-##################################
-
-
-@register_daemon_form
-class HAproxy(ContainerDaemonForm):
- """Defines an HAproxy container"""
- daemon_type = 'haproxy'
- required_files = ['haproxy.cfg']
- default_image = DEFAULT_HAPROXY_IMAGE
-
- @classmethod
- def for_daemon_type(cls, daemon_type: str) -> bool:
- return cls.daemon_type == daemon_type
-
- def __init__(self,
- ctx: CephadmContext,
- fsid: str, daemon_id: Union[int, str],
- config_json: Dict, image: str) -> None:
- self.ctx = ctx
- self.fsid = fsid
- self.daemon_id = daemon_id
- self.image = image
-
- # config-json options
- self.files = dict_get(config_json, 'files', {})
-
- self.validate()
-
- @classmethod
- def init(cls, ctx: CephadmContext,
- fsid: str, daemon_id: Union[int, str]) -> 'HAproxy':
- return cls(ctx, fsid, daemon_id, fetch_configs(ctx),
- ctx.image)
-
- @classmethod
- def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'HAproxy':
- return cls.init(ctx, ident.fsid, ident.daemon_id)
-
- @property
- def identity(self) -> DaemonIdentity:
- return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
-
- def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
- """Create files under the container data dir"""
- if not os.path.isdir(data_dir):
- raise OSError('data_dir is not a directory: %s' % (data_dir))
-
- # create additional directories in data dir for HAproxy to use
- if not os.path.isdir(os.path.join(data_dir, 'haproxy')):
- makedirs(os.path.join(data_dir, 'haproxy'), uid, gid, DATA_DIR_MODE)
-
- data_dir = os.path.join(data_dir, 'haproxy')
- populate_files(data_dir, self.files, uid, gid)
-
- def get_daemon_args(self) -> List[str]:
- return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg']
-
- def validate(self):
- # type: () -> None
- if not is_fsid(self.fsid):
- raise Error('not an fsid: %s' % self.fsid)
- if not self.daemon_id:
- raise Error('invalid daemon_id: %s' % self.daemon_id)
- if not self.image:
- raise Error('invalid image: %s' % self.image)
-
- # check for the required files
- if self.required_files:
- for fname in self.required_files:
- if fname not in self.files:
- raise Error('required file missing from config-json: %s' % fname)
-
- def get_daemon_name(self):
- # type: () -> str
- return '%s.%s' % (self.daemon_type, self.daemon_id)
-
- def get_container_name(self, desc=None):
- # type: (Optional[str]) -> str
- cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
- if desc:
- cname = '%s-%s' % (cname, desc)
- return cname
-
- def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
- # better directory for this?
- return extract_uid_gid(self.ctx, file_path='/var/lib')
-
- @staticmethod
- def _get_container_mounts(data_dir: str) -> Dict[str, str]:
- mounts = dict()
- mounts[os.path.join(data_dir, 'haproxy')] = '/var/lib/haproxy'
- return mounts
-
- def customize_container_mounts(
- self, ctx: CephadmContext, mounts: Dict[str, str]
- ) -> None:
- data_dir = self.identity.data_dir(ctx.data_dir)
- mounts.update(self._get_container_mounts(data_dir))
-
- @staticmethod
- def get_sysctl_settings() -> List[str]:
- return [
- '# IP forwarding and non-local bind',
- 'net.ipv4.ip_forward = 1',
- 'net.ipv4.ip_nonlocal_bind = 1',
- ]
-
- def container(self, ctx: CephadmContext) -> CephContainer:
- ctr = get_container(ctx, self.identity)
- return to_deployment_container(ctx, ctr)
-
- def customize_container_args(
- self, ctx: CephadmContext, args: List[str]
- ) -> None:
- args.extend(
- ['--user=root']
- ) # haproxy 2.4 defaults to a different user
-
- def customize_process_args(
- self, ctx: CephadmContext, args: List[str]
- ) -> None:
- args.extend(self.get_daemon_args())
-
-
-##################################
-
-
-@register_daemon_form
-class Keepalived(ContainerDaemonForm):
- """Defines an Keepalived container"""
- daemon_type = 'keepalived'
- required_files = ['keepalived.conf']
- default_image = DEFAULT_KEEPALIVED_IMAGE
-
- @classmethod
- def for_daemon_type(cls, daemon_type: str) -> bool:
- return cls.daemon_type == daemon_type
-
- def __init__(self,
- ctx: CephadmContext,
- fsid: str, daemon_id: Union[int, str],
- config_json: Dict, image: str) -> None:
- self.ctx = ctx
- self.fsid = fsid
- self.daemon_id = daemon_id
- self.image = image
-
- # config-json options
- self.files = dict_get(config_json, 'files', {})
-
- self.validate()
-
- @classmethod
- def init(cls, ctx: CephadmContext, fsid: str,
- daemon_id: Union[int, str]) -> 'Keepalived':
- return cls(ctx, fsid, daemon_id,
- fetch_configs(ctx), ctx.image)
-
- @classmethod
- def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'Keepalived':
- return cls.init(ctx, ident.fsid, ident.daemon_id)
-
- @property
- def identity(self) -> DaemonIdentity:
- return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
-
- def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
- """Create files under the container data dir"""
- if not os.path.isdir(data_dir):
- raise OSError('data_dir is not a directory: %s' % (data_dir))
-
- # create additional directories in data dir for keepalived to use
- if not os.path.isdir(os.path.join(data_dir, 'keepalived')):
- makedirs(os.path.join(data_dir, 'keepalived'), uid, gid, DATA_DIR_MODE)
-
- # populate files from the config-json
- populate_files(data_dir, self.files, uid, gid)
-
- def validate(self):
- # type: () -> None
- if not is_fsid(self.fsid):
- raise Error('not an fsid: %s' % self.fsid)
- if not self.daemon_id:
- raise Error('invalid daemon_id: %s' % self.daemon_id)
- if not self.image:
- raise Error('invalid image: %s' % self.image)
-
- # check for the required files
- if self.required_files:
- for fname in self.required_files:
- if fname not in self.files:
- raise Error('required file missing from config-json: %s' % fname)
-
- def get_daemon_name(self):
- # type: () -> str
- return '%s.%s' % (self.daemon_type, self.daemon_id)
-
- def get_container_name(self, desc=None):
- # type: (Optional[str]) -> str
- cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
- if desc:
- cname = '%s-%s' % (cname, desc)
- return cname
-
- @staticmethod
- def get_container_envs():
- # type: () -> List[str]
- envs = [
- 'KEEPALIVED_AUTOCONF=false',
- 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf',
- 'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf',
- 'KEEPALIVED_DEBUG=false'
- ]
- return envs
-
- @staticmethod
- def get_sysctl_settings() -> List[str]:
- return [
- '# IP forwarding and non-local bind',
- 'net.ipv4.ip_forward = 1',
- 'net.ipv4.ip_nonlocal_bind = 1',
- ]
-
- def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
- # better directory for this?
- return extract_uid_gid(self.ctx, file_path='/var/lib')
-
- @staticmethod
- def _get_container_mounts(data_dir: str) -> Dict[str, str]:
- mounts = dict()
- mounts[os.path.join(data_dir, 'keepalived.conf')] = '/etc/keepalived/keepalived.conf'
- return mounts
-
- def customize_container_mounts(
- self, ctx: CephadmContext, mounts: Dict[str, str]
- ) -> None:
- data_dir = self.identity.data_dir(ctx.data_dir)
- mounts.update(self._get_container_mounts(data_dir))
-
- def container(self, ctx: CephadmContext) -> CephContainer:
- ctr = get_container(ctx, self.identity)
- return to_deployment_container(ctx, ctr)
-
- def customize_container_envs(
- self, ctx: CephadmContext, envs: List[str]
- ) -> None:
- envs.extend(self.get_container_envs())
-
- def customize_container_args(
- self, ctx: CephadmContext, args: List[str]
- ) -> None:
- args.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW'])
-
-
-##################################
-
-
-@register_daemon_form
-class Tracing(ContainerDaemonForm):
- """Define the configs for the jaeger tracing containers"""
-
- components: Dict[str, Dict[str, Any]] = {
- 'elasticsearch': {
- 'image': DEFAULT_ELASTICSEARCH_IMAGE,
- 'envs': ['discovery.type=single-node']
- },
- 'jaeger-agent': {
- 'image': DEFAULT_JAEGER_AGENT_IMAGE,
- },
- 'jaeger-collector': {
- 'image': DEFAULT_JAEGER_COLLECTOR_IMAGE,
- },
- 'jaeger-query': {
- 'image': DEFAULT_JAEGER_QUERY_IMAGE,
- },
- } # type: ignore
-
- @classmethod
- def for_daemon_type(cls, daemon_type: str) -> bool:
- return daemon_type in cls.components
-
- @staticmethod
- def set_configuration(config: Dict[str, str], daemon_type: str) -> None:
- if daemon_type in ['jaeger-collector', 'jaeger-query']:
- assert 'elasticsearch_nodes' in config
- Tracing.components[daemon_type]['envs'] = [
- 'SPAN_STORAGE_TYPE=elasticsearch',
- f'ES_SERVER_URLS={config["elasticsearch_nodes"]}']
- if daemon_type == 'jaeger-agent':
- assert 'collector_nodes' in config
- Tracing.components[daemon_type]['daemon_args'] = [
- f'--reporter.grpc.host-port={config["collector_nodes"]}',
- '--processor.jaeger-compact.server-host-port=6799'
- ]
-
- def __init__(self, ident: DaemonIdentity) -> None:
- self._identity = ident
- self._configured = False
-
- def _configure(self, ctx: CephadmContext) -> None:
- if self._configured:
- return
- config = fetch_configs(ctx)
- # Currently, this method side-effects the class attribute, and that
- # is unpleasant. In the future it would be nice to move all of
- # set_configuration into _confiure and only modify each classes data
- # independently
- self.set_configuration(config, self.identity.daemon_type)
- self._configured = True
-
- @classmethod
- def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'Tracing':
- return cls(ident)
-
- @property
- def identity(self) -> DaemonIdentity:
- return self._identity
-
- def container(self, ctx: CephadmContext) -> CephContainer:
- ctr = get_container(ctx, self.identity)
- return to_deployment_container(ctx, ctr)
-
- def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
- return 65534, 65534
-
- def get_daemon_args(self) -> List[str]:
- return self.components[self.identity.daemon_type].get(
- 'daemon_args', []
- )
-
- def customize_process_args(
- self, ctx: CephadmContext, args: List[str]
- ) -> None:
- self._configure(ctx)
- # earlier code did an explicit check if the daemon type was jaeger-agent
- # and would only call get_daemon_args if that was true. However, since
- # the function only returns a non-empty list in the case of jaeger-agent
- # that check is unnecessary and is not brought over.
- args.extend(self.get_daemon_args())
-
- def customize_container_envs(
- self, ctx: CephadmContext, envs: List[str]
- ) -> None:
- self._configure(ctx)
- envs.extend(
- self.components[self.identity.daemon_type].get('envs', [])
- )
-
- def default_entrypoint(self) -> str:
- return ''
-
-
-##################################
-
-
-@register_daemon_form
-class CustomContainer(ContainerDaemonForm):
- """Defines a custom container"""
- daemon_type = 'container'
-
- @classmethod
- def for_daemon_type(cls, daemon_type: str) -> bool:
- return cls.daemon_type == daemon_type
-
- def __init__(self,
- fsid: str, daemon_id: Union[int, str],
- config_json: Dict, image: str) -> None:
- self.fsid = fsid
- self.daemon_id = daemon_id
- self.image = image
-
- # config-json options
- self.entrypoint = dict_get(config_json, 'entrypoint')
- self.uid = dict_get(config_json, 'uid', 65534) # nobody
- self.gid = dict_get(config_json, 'gid', 65534) # nobody
- self.volume_mounts = dict_get(config_json, 'volume_mounts', {})
- self.args = dict_get(config_json, 'args', [])
- self.envs = dict_get(config_json, 'envs', [])
- self.privileged = dict_get(config_json, 'privileged', False)
- self.bind_mounts = dict_get(config_json, 'bind_mounts', [])
- self.ports = dict_get(config_json, 'ports', [])
- self.dirs = dict_get(config_json, 'dirs', [])
- self.files = dict_get(config_json, 'files', {})
-
- @classmethod
- def init(cls, ctx: CephadmContext,
- fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer':
- return cls(fsid, daemon_id,
- fetch_configs(ctx), ctx.image)
-
- @classmethod
- def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'CustomContainer':
- return cls.init(ctx, ident.fsid, ident.daemon_id)
-
- @property
- def identity(self) -> DaemonIdentity:
- return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
-
- def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
- """
- Create dirs/files below the container data directory.
- """
- logger.info('Creating custom container configuration '
- 'dirs/files in {} ...'.format(data_dir))
-
- if not os.path.isdir(data_dir):
- raise OSError('data_dir is not a directory: %s' % data_dir)
-
- for dir_path in self.dirs:
- logger.info('Creating directory: {}'.format(dir_path))
- dir_path = os.path.join(data_dir, dir_path.strip('/'))
- makedirs(dir_path, uid, gid, 0o755)
-
- for file_path in self.files:
- logger.info('Creating file: {}'.format(file_path))
- content = dict_get_join(self.files, file_path)
- file_path = os.path.join(data_dir, file_path.strip('/'))
- with write_new(file_path, owner=(uid, gid), encoding='utf-8') as f:
- f.write(content)
-
- def get_daemon_args(self) -> List[str]:
- return []
-
- def get_container_args(self) -> List[str]:
- return self.args
-
- def get_container_envs(self) -> List[str]:
- return self.envs
-
- def _get_container_mounts(self, data_dir: str) -> Dict[str, str]:
- """
- Get the volume mounts. Relative source paths will be located below
- `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
-
- Example:
- {
- /foo/conf: /conf
- foo/conf: /conf
- }
- becomes
- {
- /foo/conf: /conf
- /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
- }
- """
- mounts = {}
- for source, destination in self.volume_mounts.items():
- source = os.path.join(data_dir, source)
- mounts[source] = destination
- return mounts
-
- def customize_container_mounts(
- self, ctx: CephadmContext, mounts: Dict[str, str]
- ) -> None:
- data_dir = self.identity.data_dir(ctx.data_dir)
- mounts.update(self._get_container_mounts(data_dir))
-
- def _get_container_binds(self, data_dir: str) -> List[List[str]]:
- """
- Get the bind mounts. Relative `source=...` paths will be located below
- `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
-
- Example:
- [
- 'type=bind',
- 'source=lib/modules',
- 'destination=/lib/modules',
- 'ro=true'
- ]
- becomes
- [
- ...
- 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules',
- ...
- ]
- """
- binds = self.bind_mounts.copy()
- for bind in binds:
- for index, value in enumerate(bind):
- match = re.match(r'^source=(.+)$', value)
- if match:
- bind[index] = 'source={}'.format(os.path.join(
- data_dir, match.group(1)))
- return binds
-
- def customize_container_binds(
- self, ctx: CephadmContext, binds: List[List[str]]
- ) -> None:
- data_dir = self.identity.data_dir(ctx.data_dir)
- binds.extend(self._get_container_binds(data_dir))
-
- # Cache the container so we don't need to rebuild it again when calling
- # into init_containers
- _container: Optional[CephContainer] = None
-
- def container(self, ctx: CephadmContext) -> CephContainer:
- if self._container is None:
- ctr = get_container(
- ctx,
- self.identity,
- privileged=self.privileged,
- ptrace=ctx.allow_ptrace,
- )
- self._container = to_deployment_container(ctx, ctr)
- return self._container
-
- def init_containers(self, ctx: CephadmContext) -> List[InitContainer]:
- primary = self.container(ctx)
- init_containers: List[Dict[str, Any]] = getattr(
- ctx, 'init_containers', []
- )
- return [
- InitContainer.from_primary_and_opts(ctx, primary, ic_opts)
- for ic_opts in init_containers
- ]
-
- def customize_container_endpoints(
- self, endpoints: List[EndPoint], deployment_type: DeploymentType
- ) -> None:
- if deployment_type == DeploymentType.DEFAULT:
- endpoints.extend([EndPoint('0.0.0.0', p) for p in self.ports])
-
- def customize_container_envs(
- self, ctx: CephadmContext, envs: List[str]
- ) -> None:
- envs.extend(self.get_container_envs())
-
- def customize_container_args(
- self, ctx: CephadmContext, args: List[str]
- ) -> None:
- args.extend(self.get_container_args())
-
- def customize_process_args(
- self, ctx: CephadmContext, args: List[str]
- ) -> None:
- args.extend(self.get_daemon_args())
-
- def default_entrypoint(self) -> str:
- return self.entrypoint or ''
-
- def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
- return self.uid, self.gid
-
-
-##################################
-
-
def get_supported_daemons():
# type: () -> List[str]
supported_daemons = ceph_daemons()
@@ -2198,15 +226,6 @@ def get_supported_daemons():
assert len(supported_daemons) == len(set(supported_daemons))
return supported_daemons
-
-def ceph_daemons() -> List[str]:
- cds = list(Ceph._daemons)
- cds.append(CephExporter.daemon_type)
- return cds
-
-##################################
-
-
##################################
@@ -2829,76 +848,11 @@ def get_container_mounts_for_type(
"""Return a dictionary mapping container-external paths to container-internal
paths given an fsid and daemon_type.
"""
- mounts = _get_container_mounts_for_type(ctx, fsid, daemon_type)
+ mounts = get_ceph_mounts_for_type(ctx, fsid, daemon_type)
_update_podman_mounts(ctx, mounts)
return mounts
-def _get_container_mounts_for_type(
- ctx: CephadmContext, fsid: str, daemon_type: str
-) -> Dict[str, str]:
- """The main implementation of get_container_mounts_for_type minus the call
- to _update_podman_mounts so that this can be called from
- get_container_mounts.
- """
- mounts = dict()
-
- if daemon_type in ceph_daemons():
- if fsid:
- run_path = os.path.join('/var/run/ceph', fsid)
- if os.path.exists(run_path):
- mounts[run_path] = '/var/run/ceph:z'
- log_dir = get_log_dir(fsid, ctx.log_dir)
- mounts[log_dir] = '/var/log/ceph:z'
- crash_dir = '/var/lib/ceph/%s/crash' % fsid
- if os.path.exists(crash_dir):
- mounts[crash_dir] = '/var/lib/ceph/crash:z'
- if daemon_type != 'crash' and should_log_to_journald(ctx):
- journald_sock_dir = '/run/systemd/journal'
- mounts[journald_sock_dir] = journald_sock_dir
-
- if daemon_type in ['mon', 'osd', 'clusterless-ceph-volume']:
- mounts['/dev'] = '/dev' # FIXME: narrow this down?
- mounts['/run/udev'] = '/run/udev'
- if daemon_type in ['osd', 'clusterless-ceph-volume']:
- mounts['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
- mounts['/run/lvm'] = '/run/lvm'
- mounts['/run/lock/lvm'] = '/run/lock/lvm'
- if daemon_type == 'osd':
- # selinux-policy in the container may not match the host.
- if HostFacts(ctx).selinux_enabled:
- cluster_dir = f'{ctx.data_dir}/{fsid}'
- selinux_folder = f'{cluster_dir}/selinux'
- if os.path.exists(cluster_dir):
- if not os.path.exists(selinux_folder):
- os.makedirs(selinux_folder, mode=0o755)
- mounts[selinux_folder] = '/sys/fs/selinux:ro'
- else:
- logger.error(f'Cluster direcotry {cluster_dir} does not exist.')
- mounts['/'] = '/rootfs'
-
- try:
- if ctx.shared_ceph_folder: # make easy manager modules/ceph-volume development
- ceph_folder = pathify(ctx.shared_ceph_folder)
- if os.path.exists(ceph_folder):
- cephadm_binary = ceph_folder + '/src/cephadm/cephadm'
- if not os.path.exists(pathify(cephadm_binary)):
- raise Error("cephadm binary does not exist. Please run './build.sh cephadm' from ceph/src/cephadm/ directory.")
- mounts[cephadm_binary] = '/usr/sbin/cephadm'
- mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
- mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
- mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
- mounts[ceph_folder + '/monitoring/ceph-mixin/dashboards_out'] = '/etc/grafana/dashboards/ceph-dashboard'
- mounts[ceph_folder + '/monitoring/ceph-mixin/prometheus_alerts.yml'] = '/etc/prometheus/ceph/ceph_default_alerts.yml'
- else:
- logger.error(
- 'Ceph shared source folder does not exist.',
- extra=Highlight.FAILURE.extra())
- except AttributeError:
- pass
- return mounts
-
-
def get_container_mounts(
ctx: CephadmContext, ident: 'DaemonIdentity', no_config: bool = False
) -> Dict[str, str]:
@@ -2961,107 +915,13 @@ def get_ceph_volume_container(ctx: CephadmContext,
def get_container(
ctx: CephadmContext,
ident: 'DaemonIdentity',
- privileged: bool = False,
- ptrace: bool = False,
- container_args: Optional[List[str]] = None,
) -> 'CephContainer':
- entrypoint: str = ''
- d_args: List[str] = []
- envs: List[str] = []
- host_network: bool = True
- binds: List[List[str]] = []
- mounts: Dict[str, str] = {}
-
- daemon_type = ident.daemon_type
- if container_args is None:
- container_args = []
- if Ceph.for_daemon_type(daemon_type) or OSD.for_daemon_type(daemon_type):
- ceph_daemon = daemon_form_create(ctx, ident)
- assert isinstance(ceph_daemon, ContainerDaemonForm)
- entrypoint = ceph_daemon.default_entrypoint()
- ceph_daemon.customize_container_envs(ctx, envs)
- ceph_daemon.customize_container_args(ctx, container_args)
- ceph_daemon.customize_process_args(ctx, d_args)
- mounts = get_container_mounts(ctx, ident)
- if daemon_type in ['mon', 'osd']:
- # mon and osd need privileged in order for libudev to query devices
- privileged = True
- if daemon_type in Monitoring.components:
- monitoring = Monitoring.create(ctx, ident)
- entrypoint = monitoring.default_entrypoint()
- monitoring.customize_container_args(ctx, container_args)
- monitoring.customize_process_args(ctx, d_args)
- mounts = get_container_mounts(ctx, ident)
- elif daemon_type in Tracing.components:
- tracing = Tracing.create(ctx, ident)
- entrypoint = tracing.default_entrypoint()
- tracing.customize_container_envs(ctx, envs)
- tracing.customize_process_args(ctx, d_args)
- elif daemon_type == NFSGanesha.daemon_type:
- nfs_ganesha = NFSGanesha.create(ctx, ident)
- entrypoint = nfs_ganesha.default_entrypoint()
- nfs_ganesha.customize_container_envs(ctx, envs)
- nfs_ganesha.customize_container_args(ctx, container_args)
- nfs_ganesha.customize_process_args(ctx, d_args)
- mounts = get_container_mounts(ctx, ident)
- elif daemon_type == CephExporter.daemon_type:
- ceph_exporter = CephExporter.create(ctx, ident)
- entrypoint = ceph_exporter.default_entrypoint()
- ceph_exporter.customize_container_envs(ctx, envs)
- ceph_exporter.customize_container_args(ctx, container_args)
- ceph_exporter.customize_process_args(ctx, d_args)
- mounts = get_container_mounts(ctx, ident)
- elif daemon_type == HAproxy.daemon_type:
- haproxy = HAproxy.create(ctx, ident)
- haproxy.customize_container_args(ctx, container_args)
- haproxy.customize_process_args(ctx, d_args)
- mounts = get_container_mounts(ctx, ident)
- elif daemon_type == Keepalived.daemon_type:
- keepalived = Keepalived.create(ctx, ident)
- keepalived.customize_container_envs(ctx, envs)
- keepalived.customize_container_args(ctx, container_args)
- mounts = get_container_mounts(ctx, ident)
- elif daemon_type == CephNvmeof.daemon_type:
- nvmeof = CephNvmeof.create(ctx, ident)
- nvmeof.customize_container_args(ctx, container_args)
- binds = get_container_binds(ctx, ident)
- mounts = get_container_mounts(ctx, ident)
- elif daemon_type == CephIscsi.daemon_type:
- iscsi = CephIscsi.create(ctx, ident)
- entrypoint = iscsi.default_entrypoint()
- iscsi.customize_container_args(ctx, container_args)
- # So the container can modprobe iscsi_target_mod and have write perms
- # to configfs we need to make this a privileged container.
- privileged = True
- binds = get_container_binds(ctx, ident)
- mounts = get_container_mounts(ctx, ident)
- elif daemon_type == CustomContainer.daemon_type:
- cc = CustomContainer.init(ctx, ident.fsid, ident.daemon_id)
- entrypoint = cc.default_entrypoint()
- host_network = False
- cc.customize_container_envs(ctx, envs)
- cc.customize_container_args(ctx, container_args)
- cc.customize_process_args(ctx, d_args)
- binds = get_container_binds(ctx, ident)
- mounts = get_container_mounts(ctx, ident)
- elif daemon_type == SNMPGateway.daemon_type:
- sg = SNMPGateway.create(ctx, ident)
- sg.customize_container_args(ctx, container_args)
- sg.customize_process_args(ctx, d_args)
-
- _update_container_args_for_podman(ctx, ident, container_args)
- return CephContainer.for_daemon(
- ctx,
- ident=ident,
- entrypoint=entrypoint,
- args=d_args,
- container_args=container_args,
- volume_mounts=mounts,
- bind_mounts=binds,
- envs=envs,
- privileged=privileged,
- ptrace=ptrace,
- host_network=host_network,
+ daemon = daemon_form_create(ctx, ident)
+ assert isinstance(daemon, ContainerDaemonForm)
+ privileged = ident.daemon_type in {'mon', 'osd', CephIscsi.daemon_type}
+ host_network = ident.daemon_type != CustomContainer.daemon_type
+ return daemon_to_container(
+ ctx, daemon, privileged=privileged, host_network=host_network
)
@@ -3196,97 +1056,6 @@ def deploy_daemon(
call_throws(ctx, ['systemctl', 'restart', ident.unit_name])
-def _bash_cmd(
- fh: IO[str],
- cmd: List[str],
- check: bool = True,
- background: bool = False,
- stderr: bool = True,
-) -> None:
- line = ' '.join(shlex.quote(arg) for arg in cmd)
- if not check:
- line = f'! {line}'
- if not stderr:
- line = f'{line} 2> /dev/null'
- if background:
- line = f'{line} &'
- fh.write(line)
- fh.write('\n')
-
-
-def _write_container_cmd_to_bash(
- ctx: CephadmContext,
- file_obj: IO[str],
- container: 'CephContainer',
- comment: Optional[str] = None,
- background: Optional[bool] = False,
-) -> None:
- if comment:
- # Sometimes adding a comment, especially if there are multiple containers in one
- # unit file, makes it easier to read and grok.
- assert '\n' not in comment
- file_obj.write(f'# {comment}\n')
- # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
- _bash_cmd(
- file_obj, container.rm_cmd(old_cname=True), check=False, stderr=False
- )
- _bash_cmd(file_obj, container.rm_cmd(), check=False, stderr=False)
-
- # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
- if isinstance(ctx.container_engine, Podman):
- _bash_cmd(
- file_obj,
- container.rm_cmd(storage=True),
- check=False,
- stderr=False,
- )
- _bash_cmd(
- file_obj,
- container.rm_cmd(old_cname=True, storage=True),
- check=False,
- stderr=False,
- )
-
- # container run command
- _bash_cmd(file_obj, container.run_cmd(), background=bool(background))
-
-
-def _write_init_container_cmds(
- ctx: CephadmContext,
- file_obj: IO[str],
- index: int,
- init_container: 'InitContainer',
-) -> None:
- file_obj.write(f'# init container {index}: {init_container.cname}\n')
- _bash_cmd(file_obj, init_container.run_cmd())
- _write_init_container_cmds_clean(ctx, file_obj, init_container, comment='')
-
-
-def _write_init_container_cmds_clean(
- ctx: CephadmContext,
- file_obj: IO[str],
- init_container: 'InitContainer',
- comment: str = 'init container cleanup',
-) -> None:
- if comment:
- assert '\n' not in comment
- file_obj.write(f'# {comment}\n')
- _bash_cmd(
- file_obj,
- init_container.rm_cmd(),
- check=False,
- stderr=False,
- )
- # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
- if isinstance(ctx.container_engine, Podman):
- _bash_cmd(
- file_obj,
- init_container.rm_cmd(storage=True),
- check=False,
- stderr=False,
- )
-
-
def clean_cgroup(ctx: CephadmContext, fsid: str, unit_name: str) -> None:
# systemd may fail to cleanup cgroups from previous stopped unit, which will cause next "systemctl start" to fail.
# see https://tracker.ceph.com/issues/50998
@@ -3324,85 +1093,44 @@ def deploy_daemon_units(
endpoints: Optional[List[EndPoint]] = None,
init_containers: Optional[List['InitContainer']] = None,
) -> None:
- # cmd
-
- # unpack values from ident because they're used very frequently
- fsid = ident.fsid
- daemon_type = ident.daemon_type
- daemon_id = ident.daemon_id
-
data_dir = ident.data_dir(ctx.data_dir)
- run_file_path = data_dir + '/unit.run'
- meta_file_path = data_dir + '/unit.meta'
- with write_new(run_file_path) as f, write_new(meta_file_path) as metaf:
-
- f.write('set -e\n')
-
- if daemon_type in ceph_daemons():
- install_path = find_program('install')
- f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid))
+ pre_start_commands: List[runscripts.Command] = []
+ post_stop_commands: List[runscripts.Command] = []
+
+ if ident.daemon_type in ceph_daemons():
+ install_path = find_program('install')
+ pre_start_commands.append('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=ident.fsid, uid=uid, gid=gid))
+ if ident.daemon_type == 'osd':
+ assert osd_fsid
+ pre_start_commands.extend(_osd_unit_run_commands(
+ ctx, ident, osd_fsid, data_dir, uid, gid
+ ))
+ post_stop_commands.extend(
+ _osd_unit_poststop_commands(ctx, ident, osd_fsid)
+ )
+ if ident.daemon_type == CephIscsi.daemon_type:
+ pre_start_commands.extend(_iscsi_unit_run_commands(ctx, ident, data_dir))
+ post_stop_commands.extend(_iscsi_unit_poststop_commands(ctx, ident, data_dir))
- # pre-start cmd(s)
- if daemon_type == 'osd':
- assert osd_fsid
- _write_osd_unit_run_commands(
- ctx, f, ident, osd_fsid, data_dir, uid, gid
- )
- elif daemon_type == CephIscsi.daemon_type:
- _write_iscsi_unit_run_commands(ctx, f, ident, data_dir)
- init_containers = init_containers or []
- if init_containers:
- _write_init_container_cmds_clean(ctx, f, init_containers[0])
- for idx, ic in enumerate(init_containers):
- _write_init_container_cmds(ctx, f, idx, ic)
-
- _write_container_cmd_to_bash(ctx, f, container, '%s.%s' % (daemon_type, str(daemon_id)))
-
- # some metadata about the deploy
- meta: Dict[str, Any] = fetch_meta(ctx)
- meta.update({
- 'memory_request': int(ctx.memory_request) if ctx.memory_request else None,
- 'memory_limit': int(ctx.memory_limit) if ctx.memory_limit else None,
- })
- if not meta.get('ports'):
- if endpoints:
- meta['ports'] = [e.port for e in endpoints]
- else:
- meta['ports'] = []
- metaf.write(json.dumps(meta, indent=4) + '\n')
-
- timeout = 30 if daemon_type == 'osd' else None
- # post-stop command(s)
- with write_new(data_dir + '/unit.poststop') as f:
- # this is a fallback to eventually stop any underlying container that was not stopped properly by unit.stop,
- # this could happen in very slow setups as described in the issue https://tracker.ceph.com/issues/58242.
- _write_stop_actions(ctx, cast(TextIO, f), container, timeout)
- if daemon_type == 'osd':
- assert osd_fsid
- _write_osd_unit_poststop_commands(ctx, f, ident, osd_fsid)
- elif daemon_type == CephIscsi.daemon_type:
- _write_iscsi_unit_poststop_commands(ctx, f, ident, data_dir)
-
- # post-stop command(s)
- with write_new(data_dir + '/unit.stop') as f:
- _write_stop_actions(ctx, cast(TextIO, f), container, timeout)
-
- if container:
- with write_new(data_dir + '/unit.image') as f:
- f.write(container.image + '\n')
+ runscripts.write_service_scripts(
+ ctx,
+ ident,
+ container=container,
+ init_containers=init_containers,
+ endpoints=endpoints,
+ pre_start_commands=pre_start_commands,
+ post_stop_commands=post_stop_commands,
+ timeout=30 if ident.daemon_type == 'osd' else None,
+ )
# sysctl
- install_sysctl(ctx, fsid, daemon_form_create(ctx, ident))
+ install_sysctl(ctx, ident.fsid, daemon_form_create(ctx, ident))
# systemd
- install_base_units(ctx, fsid)
- unit = get_unit_file(ctx, fsid)
- unit_file = 'ceph-%s@.service' % (fsid)
- with write_new(ctx.unit_dir + '/' + unit_file, perms=None) as f:
- f.write(unit)
+ systemd_unit.update_files(ctx, ident)
call_throws(ctx, ['systemctl', 'daemon-reload'])
- unit_name = get_unit_name(fsid, daemon_type, daemon_id)
+ unit_name = get_unit_name(ident.fsid, ident.daemon_type, ident.daemon_id)
call(ctx, ['systemctl', 'stop', unit_name],
verbosity=CallVerbosity.DEBUG)
call(ctx, ['systemctl', 'reset-failed', unit_name],
@@ -3410,38 +1138,27 @@ def deploy_daemon_units(
if enable:
call_throws(ctx, ['systemctl', 'enable', unit_name])
if start:
- clean_cgroup(ctx, fsid, unit_name)
+ clean_cgroup(ctx, ident.fsid, unit_name)
call_throws(ctx, ['systemctl', 'start', unit_name])
-def _write_stop_actions(
- ctx: CephadmContext, f: TextIO, container: 'CephContainer', timeout: Optional[int]
-) -> None:
- # following generated script basically checks if the container exists
- # before stopping it. Exit code will be success either if it doesn't
- # exist or if it exists and is stopped successfully.
- container_exists = f'{ctx.container_engine.path} inspect %s &>/dev/null'
- f.write(f'! {container_exists % container.old_cname} || {" ".join(container.stop_cmd(old_cname=True, timeout=timeout))} \n')
- f.write(f'! {container_exists % container.cname} || {" ".join(container.stop_cmd(timeout=timeout))} \n')
-
-
-def _write_osd_unit_run_commands(
+def _osd_unit_run_commands(
ctx: CephadmContext,
- f: IO,
ident: 'DaemonIdentity',
osd_fsid: str,
data_dir: str,
uid: int,
gid: int,
-) -> None:
+) -> List[runscripts.Command]:
+ cmds: List[runscripts.Command] = []
# osds have a pre-start step
simple_fn = os.path.join('/etc/ceph/osd',
'%s-%s.json.adopted-by-cephadm' % (ident.daemon_id, osd_fsid))
if os.path.exists(simple_fn):
- f.write('# Simple OSDs need chown on startup:\n')
+ cmds.append('# Simple OSDs need chown on startup:\n')
for n in ['block', 'block.db', 'block.wal']:
p = os.path.join(data_dir, n)
- f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid))
+ cmds.append('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid))
else:
# if ceph-volume does not support 'ceph-volume activate', we must
# do 'ceph-volume lvm activate'.
@@ -3481,21 +1198,24 @@ def _write_osd_unit_run_commands(
bind_mounts=get_container_binds(ctx, ident),
cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id),
)
- _write_container_cmd_to_bash(ctx, f, prestart, 'LVM OSDs use ceph-volume lvm activate')
+ cmds.append(runscripts.ContainerCommand(prestart, comment='LVM OSDs use ceph-volume lvm activate'))
+ return cmds
-def _write_iscsi_unit_run_commands(
- ctx: CephadmContext, f: IO, ident: 'DaemonIdentity', data_dir: str
-) -> None:
- f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n')
+def _iscsi_unit_run_commands(
+ ctx: CephadmContext, ident: 'DaemonIdentity', data_dir: str
+) -> List[runscripts.Command]:
+ cmds: List[runscripts.Command] = []
+ cmds.append(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n')
ceph_iscsi = CephIscsi.init(ctx, ident.fsid, ident.daemon_id)
tcmu_container = ceph_iscsi.get_tcmu_runner_container()
- _write_container_cmd_to_bash(ctx, f, tcmu_container, 'iscsi tcmu-runner container', background=True)
+ cmds.append(runscripts.ContainerCommand(tcmu_container, comment='iscsi tcmu-runner container', background=True))
+ return cmds
-def _write_osd_unit_poststop_commands(
- ctx: CephadmContext, f: IO, ident: 'DaemonIdentity', osd_fsid: str
-) -> None:
+def _osd_unit_poststop_commands(
+ ctx: CephadmContext, ident: 'DaemonIdentity', osd_fsid: str
+) -> List[runscripts.Command]:
poststop = get_ceph_volume_container(
ctx,
args=[
@@ -3506,118 +1226,22 @@ def _write_osd_unit_poststop_commands(
bind_mounts=get_container_binds(ctx, ident),
cname='ceph-%s-%s.%s-deactivate' % (ident.fsid, ident.daemon_type, ident.daemon_id),
)
- _write_container_cmd_to_bash(ctx, f, poststop, 'deactivate osd')
+ return [runscripts.ContainerCommand(poststop, comment='deactivate osd')]
-def _write_iscsi_unit_poststop_commands(
- ctx: CephadmContext, f: IO, ident: 'DaemonIdentity', data_dir: str
-) -> None:
+def _iscsi_unit_poststop_commands(
+ ctx: CephadmContext, ident: 'DaemonIdentity', data_dir: str
+) -> List[runscripts.Command]:
# make sure we also stop the tcmu container
+ cmds: List[runscripts.Command] = []
runtime_dir = '/run'
ceph_iscsi = CephIscsi.init(ctx, ident.fsid, ident.daemon_id)
tcmu_container = ceph_iscsi.get_tcmu_runner_container()
- f.write('! ' + ' '.join(tcmu_container.stop_cmd()) + '\n')
- f.write('! ' + 'rm ' + runtime_dir + '/ceph-%s@%s.%s.service-pid' % (ident.fsid, ident.daemon_type, ident.daemon_id + '.tcmu') + '\n')
- f.write('! ' + 'rm ' + runtime_dir + '/ceph-%s@%s.%s.service-cid' % (ident.fsid, ident.daemon_type, ident.daemon_id + '.tcmu') + '\n')
- f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n')
-
-
-def install_base_units(ctx, fsid):
- # type: (CephadmContext, str) -> None
- """
- Set up ceph.target and ceph-$fsid.target units.
- """
- # global unit
- existed = os.path.exists(ctx.unit_dir + '/ceph.target')
- with write_new(ctx.unit_dir + '/ceph.target', perms=None) as f:
- f.write('[Unit]\n'
- 'Description=All Ceph clusters and services\n'
- '\n'
- '[Install]\n'
- 'WantedBy=multi-user.target\n')
- if not existed:
- # we disable before enable in case a different ceph.target
- # (from the traditional package) is present; while newer
- # systemd is smart enough to disable the old
- # (/lib/systemd/...) and enable the new (/etc/systemd/...),
- # some older versions of systemd error out with EEXIST.
- call_throws(ctx, ['systemctl', 'disable', 'ceph.target'])
- call_throws(ctx, ['systemctl', 'enable', 'ceph.target'])
- call_throws(ctx, ['systemctl', 'start', 'ceph.target'])
-
- # cluster unit
- existed = os.path.exists(ctx.unit_dir + '/ceph-%s.target' % fsid)
- with write_new(ctx.unit_dir + f'/ceph-{fsid}.target', perms=None) as f:
- f.write(
- '[Unit]\n'
- 'Description=Ceph cluster {fsid}\n'
- 'PartOf=ceph.target\n'
- 'Before=ceph.target\n'
- '\n'
- '[Install]\n'
- 'WantedBy=multi-user.target ceph.target\n'.format(
- fsid=fsid)
- )
- if not existed:
- call_throws(ctx, ['systemctl', 'enable', 'ceph-%s.target' % fsid])
- call_throws(ctx, ['systemctl', 'start', 'ceph-%s.target' % fsid])
-
- # don't overwrite file in order to allow users to manipulate it
- if os.path.exists(ctx.logrotate_dir + f'/ceph-{fsid}'):
- return
-
- # logrotate for the cluster
- with write_new(ctx.logrotate_dir + f'/ceph-{fsid}', perms=None) as f:
- """
- This is a bit sloppy in that the killall/pkill will touch all ceph daemons
- in all containers, but I don't see an elegant way to send SIGHUP *just* to
- the daemons for this cluster. (1) systemd kill -s will get the signal to
- podman, but podman will exit. (2) podman kill will get the signal to the
- first child (bash), but that isn't the ceph daemon. This is simpler and
- should be harmless.
- """
- targets: List[str] = [
- 'ceph-mon',
- 'ceph-mgr',
- 'ceph-mds',
- 'ceph-osd',
- 'ceph-fuse',
- 'radosgw',
- 'rbd-mirror',
- 'cephfs-mirror',
- 'tcmu-runner'
- ]
-
- f.write("""# created by cephadm
-/var/log/ceph/%s/*.log {
- rotate 7
- daily
- compress
- sharedscripts
- postrotate
- killall -q -1 %s || pkill -1 -x '%s' || true
- endscript
- missingok
- notifempty
- su root root
-}
-""" % (fsid, ' '.join(targets), '|'.join(targets)))
-
-
-def get_unit_file(ctx: CephadmContext, fsid: str) -> str:
- has_docker_engine = isinstance(ctx.container_engine, Docker)
- has_podman_engine = isinstance(ctx.container_engine, Podman)
- has_podman_split_version = (
- has_podman_engine and ctx.container_engine.supports_split_cgroups
- )
- return templating.render(
- ctx,
- templating.Templates.ceph_service,
- fsid=fsid,
- has_docker_engine=has_docker_engine,
- has_podman_engine=has_podman_engine,
- has_podman_split_version=has_podman_split_version,
- )
+ cmds.append('! ' + ' '.join(tcmu_container.stop_cmd()) + '\n')
+ cmds.append('! ' + 'rm ' + runtime_dir + '/ceph-%s@%s.%s.service-pid' % (ident.fsid, ident.daemon_type, ident.daemon_id + '.tcmu') + '\n')
+ cmds.append('! ' + 'rm ' + runtime_dir + '/ceph-%s@%s.%s.service-cid' % (ident.fsid, ident.daemon_type, ident.daemon_id + '.tcmu') + '\n')
+ cmds.append(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n')
+ return cmds
##################################
@@ -3949,7 +1573,7 @@ class CephadmAgent(DaemonForm):
)
name_id_mapping: Dict[str, str] = self._parse_container_id_name(code, out)
for i in os.listdir(data_dir):
- if i in ['mon', 'osd', 'mds', 'mgr']:
+ if i in ['mon', 'osd', 'mds', 'mgr', 'rgw']:
daemon_type = i
for j in os.listdir(os.path.join(data_dir, i)):
if '-' not in j:
@@ -5336,36 +2960,6 @@ def command_registry_login(ctx: CephadmContext) -> int:
##################################
-def to_deployment_container(
- ctx: CephadmContext, ctr: CephContainer
-) -> CephContainer:
- """Given a standard ceph container instance return a CephContainer
- prepared for a deployment as a daemon, having the extra args and
- custom configurations added.
- NOTE: The `ctr` object is mutated before being returned.
- """
- if 'extra_container_args' in ctx and ctx.extra_container_args:
- ctr.container_args.extend(ctx.extra_container_args)
- if 'extra_entrypoint_args' in ctx and ctx.extra_entrypoint_args:
- ctr.args.extend(ctx.extra_entrypoint_args)
- ccfiles = fetch_custom_config_files(ctx)
- if ccfiles:
- mandatory_keys = ['mount_path', 'content']
- for conf in ccfiles:
- if all(k in conf for k in mandatory_keys):
- mount_path = conf['mount_path']
- assert ctr.identity
- file_path = os.path.join(
- ctx.data_dir,
- ctr.identity.fsid,
- 'custom_config_files',
- ctr.identity.daemon_name,
- os.path.basename(mount_path)
- )
- ctr.volume_mounts[file_path] = mount_path
- return ctr
-
-
def get_deployment_type(
ctx: CephadmContext, ident: 'DaemonIdentity',
) -> DeploymentType:
@@ -5700,21 +3294,16 @@ def command_ceph_volume(ctx):
##################################
+@infer_fsid
def command_unit_install(ctx):
# type: (CephadmContext) -> int
- if not ctx.fsid:
+ if not getattr(ctx, 'fsid', None):
raise Error('must pass --fsid to specify cluster')
-
- fsid = ctx.fsid
- install_base_units(ctx, fsid)
- unit = get_unit_file(ctx, fsid)
- unit_file = 'ceph-%s@.service' % (fsid)
- with open(ctx.unit_dir + '/' + unit_file + '.new', 'w') as f:
- f.write(unit)
- os.rename(ctx.unit_dir + '/' + unit_file + '.new',
- ctx.unit_dir + '/' + unit_file)
+ if not getattr(ctx, 'name', None):
+ raise Error('daemon name required')
+ ident = DaemonIdentity.from_context(ctx)
+ systemd_unit.update_files(ctx, ident)
call_throws(ctx, ['systemctl', 'daemon-reload'])
-
return 0
@@ -5813,7 +3402,7 @@ def list_daemons(ctx, detail=True, legacy_dir=None):
# /var/lib/ceph
if os.path.exists(data_dir):
for i in os.listdir(data_dir):
- if i in ['mon', 'osd', 'mds', 'mgr']:
+ if i in ['mon', 'osd', 'mds', 'mgr', 'rgw']:
daemon_type = i
for j in os.listdir(os.path.join(data_dir, i)):
if '-' not in j:
@@ -7460,6 +5049,13 @@ def _get_parser():
parser_unit_install = subparsers.add_parser(
'unit-install', help="Install the daemon's systemd unit")
parser_unit_install.set_defaults(func=command_unit_install)
+ parser_unit_install.add_argument(
+ '--fsid',
+ help='cluster FSID')
+ parser_unit_install.add_argument(
+ '--name', '-n',
+ required=True,
+ help='daemon name (type.id)')
parser_logs = subparsers.add_parser(
'logs', help='print journald logs for a daemon container')
diff --git a/src/cephadm/cephadmlib/container_daemon_form.py b/src/cephadm/cephadmlib/container_daemon_form.py
index ae8b6afe3c6..693b0cf8df0 100644
--- a/src/cephadm/cephadmlib/container_daemon_form.py
+++ b/src/cephadm/cephadmlib/container_daemon_form.py
@@ -4,6 +4,7 @@ import abc
from typing import List, Tuple, Optional, Dict
+from .container_engines import Podman
from .container_types import CephContainer, InitContainer
from .context import CephadmContext
from .daemon_form import DaemonForm
@@ -110,3 +111,67 @@ class ContainerDaemonForm(DaemonForm):
in a container.
"""
return ''
+
+
+def daemon_to_container(
+ ctx: CephadmContext,
+ daemon: ContainerDaemonForm,
+ *,
+ privileged: bool = False,
+ ptrace: bool = False,
+ host_network: bool = True,
+ entrypoint: Optional[str] = None,
+ container_args: Optional[List[str]] = None,
+ container_mounts: Optional[Dict[str, str]] = None,
+ container_binds: Optional[List[List[str]]] = None,
+ envs: Optional[List[str]] = None,
+ args: Optional[List[str]] = None,
+ auto_podman_args: bool = True,
+ auto_podman_mounts: bool = True,
+) -> CephContainer:
+ """daemon_to_container is a utility function that serves to create
+ CephContainer instances from a container daemon form's customize and
+ entrypoint methods.
+ Most of the parameters (like mounts, container_args, etc) can be passed in
+ to "pre customize" the values.
+ The auto_podman_args argument enables adding default arguments expected on
+ all podman daemons (true by default).
+ The auto_podman_mounts argument enables adding mounts expected on all
+ daemons running on podman (true by default).
+ """
+ container_args = container_args if container_args else []
+ container_mounts = container_mounts if container_mounts else {}
+ container_binds = container_binds if container_binds else []
+ envs = envs if envs else []
+ args = args if args else []
+
+ if entrypoint is None:
+ entrypoint = daemon.default_entrypoint()
+ daemon.customize_container_args(ctx, container_args)
+ daemon.customize_container_mounts(ctx, container_mounts)
+ daemon.customize_container_binds(ctx, container_binds)
+ daemon.customize_container_envs(ctx, envs)
+ daemon.customize_process_args(ctx, args)
+
+ _is_podman = isinstance(ctx.container_engine, Podman)
+ if auto_podman_mounts and _is_podman:
+ ctx.container_engine.update_mounts(ctx, container_mounts)
+ if auto_podman_args and _is_podman:
+ service_name = f'{daemon.identity.unit_name}.service'
+ container_args.extend(
+ ctx.container_engine.service_args(ctx, service_name)
+ )
+
+ return CephContainer.for_daemon(
+ ctx,
+ ident=daemon.identity,
+ entrypoint=entrypoint,
+ args=args,
+ container_args=container_args,
+ volume_mounts=container_mounts,
+ bind_mounts=container_binds,
+ envs=envs,
+ privileged=privileged,
+ ptrace=ptrace,
+ host_network=host_network,
+ )
diff --git a/src/cephadm/cephadmlib/daemons/__init__.py b/src/cephadm/cephadmlib/daemons/__init__.py
new file mode 100644
index 00000000000..cf572d487c9
--- /dev/null
+++ b/src/cephadm/cephadmlib/daemons/__init__.py
@@ -0,0 +1,24 @@
+from .ceph import Ceph, OSD, CephExporter
+from .custom import CustomContainer
+from .ingress import HAproxy, Keepalived
+from .iscsi import CephIscsi
+from .monitoring import Monitoring
+from .nfs import NFSGanesha
+from .nvmeof import CephNvmeof
+from .snmp import SNMPGateway
+from .tracing import Tracing
+
+__all__ = [
+ 'Ceph',
+ 'CephExporter',
+ 'CephIscsi',
+ 'CephNvmeof',
+ 'CustomContainer',
+ 'HAproxy',
+ 'Keepalived',
+ 'Monitoring',
+ 'NFSGanesha',
+ 'OSD',
+ 'SNMPGateway',
+ 'Tracing',
+]
diff --git a/src/cephadm/cephadmlib/daemons/ceph.py b/src/cephadm/cephadmlib/daemons/ceph.py
new file mode 100644
index 00000000000..0afb8f734af
--- /dev/null
+++ b/src/cephadm/cephadmlib/daemons/ceph.py
@@ -0,0 +1,462 @@
+import logging
+import os
+
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
+from ..container_types import CephContainer, extract_uid_gid
+from ..context_getters import (
+ fetch_configs,
+ get_config_and_keyring,
+ should_log_to_journald,
+)
+from ..daemon_form import register as register_daemon_form
+from ..daemon_identity import DaemonIdentity
+from ..constants import DEFAULT_IMAGE
+from ..context import CephadmContext
+from ..deployment_utils import to_deployment_container
+from ..exceptions import Error
+from ..file_utils import make_run_dir, pathify
+from ..host_facts import HostFacts
+from ..logging import Highlight
+from ..net_utils import get_hostname, get_ip_addresses
+
+
+logger = logging.getLogger()
+
+
+@register_daemon_form
+class Ceph(ContainerDaemonForm):
+ _daemons = (
+ 'mon',
+ 'mgr',
+ 'osd',
+ 'mds',
+ 'rgw',
+ 'rbd-mirror',
+ 'crash',
+ 'cephfs-mirror',
+ )
+
+ @classmethod
+ def for_daemon_type(cls, daemon_type: str) -> bool:
+ # TODO: figure out a way to un-special-case osd
+ return daemon_type in cls._daemons and daemon_type != 'osd'
+
+ def __init__(self, ctx: CephadmContext, ident: DaemonIdentity) -> None:
+ self.ctx = ctx
+ self._identity = ident
+ self.user_supplied_config = False
+
+ @classmethod
+ def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'Ceph':
+ return cls(ctx, ident)
+
+ @property
+ def identity(self) -> DaemonIdentity:
+ return self._identity
+
+ def firewall_service_name(self) -> str:
+ if self.identity.daemon_type == 'mon':
+ return 'ceph-mon'
+ elif self.identity.daemon_type in ['mgr', 'mds']:
+ return 'ceph'
+ return ''
+
+ def container(self, ctx: CephadmContext) -> CephContainer:
+ # previous to being a ContainerDaemonForm, this call to create the
+ # var-run directory was hard coded in the deploy path. Eventually, it
+ # would be good to move this somwhere cleaner and avoid needing to know
+ # the uid/gid here.
+ uid, gid = self.uid_gid(ctx)
+ make_run_dir(ctx.fsid, uid, gid)
+
+ # mon and osd need privileged in order for libudev to query devices
+ privileged = self.identity.daemon_type in ['mon', 'osd']
+ ctr = daemon_to_container(ctx, self, privileged=privileged)
+ ctr = to_deployment_container(ctx, ctr)
+ config_json = fetch_configs(ctx)
+ if self.identity.daemon_type == 'mon' and config_json is not None:
+ if 'crush_location' in config_json:
+ c_loc = config_json['crush_location']
+ # was originally "c.args.extend(['--set-crush-location', c_loc])"
+ # but that doesn't seem to persist in the object after it's passed
+ # in further function calls
+ ctr.args = ctr.args + ['--set-crush-location', c_loc]
+ return ctr
+
+ _uid_gid: Optional[Tuple[int, int]] = None
+
+ def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
+ if self._uid_gid is None:
+ self._uid_gid = extract_uid_gid(ctx)
+ return self._uid_gid
+
+ def config_and_keyring(
+ self, ctx: CephadmContext
+ ) -> Tuple[Optional[str], Optional[str]]:
+ return get_config_and_keyring(ctx)
+
+ def get_daemon_args(self) -> List[str]:
+ if self.identity.daemon_type == 'crash':
+ return []
+ r = [
+ '--setuser',
+ 'ceph',
+ '--setgroup',
+ 'ceph',
+ '--default-log-to-file=false',
+ ]
+ log_to_journald = should_log_to_journald(self.ctx)
+ if log_to_journald:
+ r += [
+ '--default-log-to-journald=true',
+ '--default-log-to-stderr=false',
+ ]
+ else:
+ r += [
+ '--default-log-to-stderr=true',
+ '--default-log-stderr-prefix=debug ',
+ ]
+ if self.identity.daemon_type == 'mon':
+ r += [
+ '--default-mon-cluster-log-to-file=false',
+ ]
+ if log_to_journald:
+ r += [
+ '--default-mon-cluster-log-to-journald=true',
+ '--default-mon-cluster-log-to-stderr=false',
+ ]
+ else:
+ r += ['--default-mon-cluster-log-to-stderr=true']
+ return r
+
+ @staticmethod
+ def get_ceph_mounts(
+ ctx: CephadmContext,
+ ident: DaemonIdentity,
+ no_config: bool = False,
+ ) -> Dict[str, str]:
+ # Warning: This is a hack done for more expedient refactoring
+ mounts = get_ceph_mounts_for_type(ctx, ident.fsid, ident.daemon_type)
+ data_dir = ident.data_dir(ctx.data_dir)
+ if ident.daemon_type == 'rgw':
+ cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (
+ ident.daemon_id
+ )
+ else:
+ cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (
+ ident.daemon_type,
+ ident.daemon_id,
+ )
+ if ident.daemon_type != 'crash':
+ mounts[data_dir] = cdata_dir + ':z'
+ if not no_config:
+ mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z'
+ if ident.daemon_type in [
+ 'rbd-mirror',
+ 'cephfs-mirror',
+ 'crash',
+ 'ceph-exporter',
+ ]:
+ # these do not search for their keyrings in a data directory
+ mounts[
+ data_dir + '/keyring'
+ ] = '/etc/ceph/ceph.client.%s.%s.keyring' % (
+ ident.daemon_type,
+ ident.daemon_id,
+ )
+ return mounts
+
+ def customize_container_mounts(
+ self, ctx: CephadmContext, mounts: Dict[str, str]
+ ) -> None:
+ no_config = bool(
+ getattr(ctx, 'config', None) and self.user_supplied_config
+ )
+ cm = self.get_ceph_mounts(
+ ctx,
+ self.identity,
+ no_config=no_config,
+ )
+ mounts.update(cm)
+
+ def customize_container_args(
+ self, ctx: CephadmContext, args: List[str]
+ ) -> None:
+ args.append(ctx.container_engine.unlimited_pids_option)
+
+ def customize_process_args(
+ self, ctx: CephadmContext, args: List[str]
+ ) -> None:
+ ident = self.identity
+ if ident.daemon_type == 'rgw':
+ name = 'client.rgw.%s' % ident.daemon_id
+ elif ident.daemon_type == 'rbd-mirror':
+ name = 'client.rbd-mirror.%s' % ident.daemon_id
+ elif ident.daemon_type == 'cephfs-mirror':
+ name = 'client.cephfs-mirror.%s' % ident.daemon_id
+ elif ident.daemon_type == 'crash':
+ name = 'client.crash.%s' % ident.daemon_id
+ elif ident.daemon_type in ['mon', 'mgr', 'mds', 'osd']:
+ name = ident.daemon_name
+ else:
+ raise ValueError(ident)
+ args.extend(['-n', name])
+ if ident.daemon_type != 'crash':
+ args.append('-f')
+ args.extend(self.get_daemon_args())
+
+ def customize_container_envs(
+ self, ctx: CephadmContext, envs: List[str]
+ ) -> None:
+ envs.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728')
+
+ def default_entrypoint(self) -> str:
+ ep = {
+ 'rgw': '/usr/bin/radosgw',
+ 'rbd-mirror': '/usr/bin/rbd-mirror',
+ 'cephfs-mirror': '/usr/bin/cephfs-mirror',
+ }
+ daemon_type = self.identity.daemon_type
+ return ep.get(daemon_type) or f'/usr/bin/ceph-{daemon_type}'
+
+
+@register_daemon_form
+class OSD(Ceph):
+ @classmethod
+ def for_daemon_type(cls, daemon_type: str) -> bool:
+ # TODO: figure out a way to un-special-case osd
+ return daemon_type == 'osd'
+
+ def __init__(
+ self,
+ ctx: CephadmContext,
+ ident: DaemonIdentity,
+ osd_fsid: Optional[str] = None,
+ ) -> None:
+ super().__init__(ctx, ident)
+ self._osd_fsid = osd_fsid
+
+ @classmethod
+ def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'OSD':
+ osd_fsid = getattr(ctx, 'osd_fsid', None)
+ if osd_fsid is None:
+ logger.info(
+ 'Creating an OSD daemon form without an OSD FSID value'
+ )
+ return cls(ctx, ident, osd_fsid)
+
+ @staticmethod
+ def get_sysctl_settings() -> List[str]:
+ return [
+ '# allow a large number of OSDs',
+ 'fs.aio-max-nr = 1048576',
+ 'kernel.pid_max = 4194304',
+ ]
+
+ def firewall_service_name(self) -> str:
+ return 'ceph'
+
+ @property
+ def osd_fsid(self) -> Optional[str]:
+ return self._osd_fsid
+
+
+@register_daemon_form
+class CephExporter(ContainerDaemonForm):
+ """Defines a Ceph exporter container"""
+
+ daemon_type = 'ceph-exporter'
+ entrypoint = '/usr/bin/ceph-exporter'
+ DEFAULT_PORT = 9926
+ port_map = {
+ 'ceph-exporter': DEFAULT_PORT,
+ }
+
+ @classmethod
+ def for_daemon_type(cls, daemon_type: str) -> bool:
+ return cls.daemon_type == daemon_type
+
+ def __init__(
+ self,
+ ctx: CephadmContext,
+ fsid: str,
+ daemon_id: Union[int, str],
+ config_json: Dict[str, Any],
+ image: str = DEFAULT_IMAGE,
+ ) -> None:
+ self.ctx = ctx
+ self.fsid = fsid
+ self.daemon_id = daemon_id
+ self.image = image
+
+ self.sock_dir = config_json.get('sock-dir', '/var/run/ceph/')
+ ipv4_addrs, _ = get_ip_addresses(get_hostname())
+ addrs = '0.0.0.0' if ipv4_addrs else '::'
+ self.addrs = config_json.get('addrs', addrs)
+ self.port = config_json.get('port', self.DEFAULT_PORT)
+ self.prio_limit = config_json.get('prio-limit', 5)
+ self.stats_period = config_json.get('stats-period', 5)
+
+ self.validate()
+
+ @classmethod
+ def init(
+ cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str]
+ ) -> 'CephExporter':
+ return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
+
+ @classmethod
+ def create(
+ cls, ctx: CephadmContext, ident: DaemonIdentity
+ ) -> 'CephExporter':
+ return cls.init(ctx, ident.fsid, ident.daemon_id)
+
+ @property
+ def identity(self) -> DaemonIdentity:
+ return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
+
+ def get_daemon_args(self) -> List[str]:
+ args = [
+ f'--sock-dir={self.sock_dir}',
+ f'--addrs={self.addrs}',
+ f'--port={self.port}',
+ f'--prio-limit={self.prio_limit}',
+ f'--stats-period={self.stats_period}',
+ ]
+ return args
+
+ def validate(self) -> None:
+ if not os.path.isdir(self.sock_dir):
+ raise Error(f'Directory does not exist. Got: {self.sock_dir}')
+
+ def container(self, ctx: CephadmContext) -> CephContainer:
+ ctr = daemon_to_container(ctx, self)
+ return to_deployment_container(ctx, ctr)
+
+ def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
+ return extract_uid_gid(ctx)
+
+ def config_and_keyring(
+ self, ctx: CephadmContext
+ ) -> Tuple[Optional[str], Optional[str]]:
+ return get_config_and_keyring(ctx)
+
+ def customize_container_mounts(
+ self, ctx: CephadmContext, mounts: Dict[str, str]
+ ) -> None:
+ cm = Ceph.get_ceph_mounts(ctx, self.identity)
+ mounts.update(cm)
+
+ def customize_process_args(
+ self, ctx: CephadmContext, args: List[str]
+ ) -> None:
+ name = 'client.ceph-exporter.%s' % self.identity.daemon_id
+ args.extend(['-n', name, '-f'])
+ args.extend(self.get_daemon_args())
+
+ def customize_container_args(
+ self, ctx: CephadmContext, args: List[str]
+ ) -> None:
+ args.append(ctx.container_engine.unlimited_pids_option)
+
+ def customize_container_envs(
+ self, ctx: CephadmContext, envs: List[str]
+ ) -> None:
+ envs.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728')
+
+ def default_entrypoint(self) -> str:
+ return self.entrypoint
+
+
+def get_ceph_mounts_for_type(
+ ctx: CephadmContext, fsid: str, daemon_type: str
+) -> Dict[str, str]:
+ """The main implementation of get_container_mounts_for_type minus the call
+ to _update_podman_mounts so that this can be called from
+ get_container_mounts.
+ """
+ mounts = dict()
+
+ if daemon_type in ceph_daemons():
+ if fsid:
+ run_path = os.path.join('/var/run/ceph', fsid)
+ if os.path.exists(run_path):
+ mounts[run_path] = '/var/run/ceph:z'
+ log_dir = os.path.join(ctx.log_dir, fsid)
+ mounts[log_dir] = '/var/log/ceph:z'
+ crash_dir = '/var/lib/ceph/%s/crash' % fsid
+ if os.path.exists(crash_dir):
+ mounts[crash_dir] = '/var/lib/ceph/crash:z'
+ if daemon_type != 'crash' and should_log_to_journald(ctx):
+ journald_sock_dir = '/run/systemd/journal'
+ mounts[journald_sock_dir] = journald_sock_dir
+
+ if daemon_type in ['mon', 'osd', 'clusterless-ceph-volume']:
+ mounts['/dev'] = '/dev' # FIXME: narrow this down?
+ mounts['/run/udev'] = '/run/udev'
+ if daemon_type in ['osd', 'clusterless-ceph-volume']:
+ mounts['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
+ mounts['/run/lvm'] = '/run/lvm'
+ mounts['/run/lock/lvm'] = '/run/lock/lvm'
+ if daemon_type == 'osd':
+ # selinux-policy in the container may not match the host.
+ if HostFacts(ctx).selinux_enabled:
+ cluster_dir = f'{ctx.data_dir}/{fsid}'
+ selinux_folder = f'{cluster_dir}/selinux'
+ if os.path.exists(cluster_dir):
+ if not os.path.exists(selinux_folder):
+ os.makedirs(selinux_folder, mode=0o755)
+ mounts[selinux_folder] = '/sys/fs/selinux:ro'
+ else:
+ logger.error(
+ f'Cluster direcotry {cluster_dir} does not exist.'
+ )
+ mounts['/'] = '/rootfs'
+
+ try:
+ if (
+ ctx.shared_ceph_folder
+ ): # make easy manager modules/ceph-volume development
+ ceph_folder = pathify(ctx.shared_ceph_folder)
+ if os.path.exists(ceph_folder):
+ cephadm_binary = ceph_folder + '/src/cephadm/cephadm'
+ if not os.path.exists(pathify(cephadm_binary)):
+ raise Error(
+ "cephadm binary does not exist. Please run './build.sh cephadm' from ceph/src/cephadm/ directory."
+ )
+ mounts[cephadm_binary] = '/usr/sbin/cephadm'
+ mounts[
+ ceph_folder + '/src/ceph-volume/ceph_volume'
+ ] = '/usr/lib/python3.6/site-packages/ceph_volume'
+ mounts[
+ ceph_folder + '/src/pybind/mgr'
+ ] = '/usr/share/ceph/mgr'
+ mounts[
+ ceph_folder + '/src/python-common/ceph'
+ ] = '/usr/lib/python3.6/site-packages/ceph'
+ mounts[
+ ceph_folder + '/monitoring/ceph-mixin/dashboards_out'
+ ] = '/etc/grafana/dashboards/ceph-dashboard'
+ mounts[
+ ceph_folder
+ + '/monitoring/ceph-mixin/prometheus_alerts.yml'
+ ] = '/etc/prometheus/ceph/ceph_default_alerts.yml'
+ else:
+ logger.error(
+ 'Ceph shared source folder does not exist.',
+ extra=Highlight.FAILURE.extra(),
+ )
+ except AttributeError:
+ pass
+ return mounts
+
+
+def ceph_daemons() -> List[str]:
+ """A legacy method that returns a list of all daemon types considered ceph
+ daemons.
+ """
+ cds = list(Ceph._daemons)
+ cds.append(CephExporter.daemon_type)
+ return cds
diff --git a/src/cephadm/cephadmlib/daemons/custom.py b/src/cephadm/cephadmlib/daemons/custom.py
new file mode 100644
index 00000000000..e833c80c9a5
--- /dev/null
+++ b/src/cephadm/cephadmlib/daemons/custom.py
@@ -0,0 +1,222 @@
+import logging
+import os
+import re
+
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
+from ..container_types import CephContainer, InitContainer
+from ..context import CephadmContext
+from ..context_getters import fetch_configs
+from ..daemon_form import register as register_daemon_form
+from ..daemon_identity import DaemonIdentity
+from ..data_utils import dict_get, dict_get_join
+from ..deploy import DeploymentType
+from ..deployment_utils import to_deployment_container
+from ..file_utils import write_new, makedirs
+from ..net_utils import EndPoint
+
+
+logger = logging.getLogger()
+
+
+@register_daemon_form
+class CustomContainer(ContainerDaemonForm):
+ """Defines a custom container"""
+
+ daemon_type = 'container'
+
+ @classmethod
+ def for_daemon_type(cls, daemon_type: str) -> bool:
+ return cls.daemon_type == daemon_type
+
+ def __init__(
+ self,
+ fsid: str,
+ daemon_id: Union[int, str],
+ config_json: Dict,
+ image: str,
+ ) -> None:
+ self.fsid = fsid
+ self.daemon_id = daemon_id
+ self.image = image
+
+ # config-json options
+ self.entrypoint = dict_get(config_json, 'entrypoint')
+ self.uid = dict_get(config_json, 'uid', 65534) # nobody
+ self.gid = dict_get(config_json, 'gid', 65534) # nobody
+ self.volume_mounts = dict_get(config_json, 'volume_mounts', {})
+ self.args = dict_get(config_json, 'args', [])
+ self.envs = dict_get(config_json, 'envs', [])
+ self.privileged = dict_get(config_json, 'privileged', False)
+ self.bind_mounts = dict_get(config_json, 'bind_mounts', [])
+ self.ports = dict_get(config_json, 'ports', [])
+ self.dirs = dict_get(config_json, 'dirs', [])
+ self.files = dict_get(config_json, 'files', {})
+
+ @classmethod
+ def init(
+ cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str]
+ ) -> 'CustomContainer':
+ return cls(fsid, daemon_id, fetch_configs(ctx), ctx.image)
+
+ @classmethod
+ def create(
+ cls, ctx: CephadmContext, ident: DaemonIdentity
+ ) -> 'CustomContainer':
+ return cls.init(ctx, ident.fsid, ident.daemon_id)
+
+ @property
+ def identity(self) -> DaemonIdentity:
+ return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
+
+ def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
+ """
+ Create dirs/files below the container data directory.
+ """
+ logger.info(
+ 'Creating custom container configuration '
+ 'dirs/files in {} ...'.format(data_dir)
+ )
+
+ if not os.path.isdir(data_dir):
+ raise OSError('data_dir is not a directory: %s' % data_dir)
+
+ for dir_path in self.dirs:
+ logger.info('Creating directory: {}'.format(dir_path))
+ dir_path = os.path.join(data_dir, dir_path.strip('/'))
+ makedirs(dir_path, uid, gid, 0o755)
+
+ for file_path in self.files:
+ logger.info('Creating file: {}'.format(file_path))
+ content = dict_get_join(self.files, file_path)
+ file_path = os.path.join(data_dir, file_path.strip('/'))
+ with write_new(
+ file_path, owner=(uid, gid), encoding='utf-8'
+ ) as f:
+ f.write(content)
+
+ def get_daemon_args(self) -> List[str]:
+ return []
+
+ def get_container_args(self) -> List[str]:
+ return self.args
+
+ def get_container_envs(self) -> List[str]:
+ return self.envs
+
+ def _get_container_mounts(self, data_dir: str) -> Dict[str, str]:
+ """
+ Get the volume mounts. Relative source paths will be located below
+ `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
+
+ Example:
+ {
+ /foo/conf: /conf
+ foo/conf: /conf
+ }
+ becomes
+ {
+ /foo/conf: /conf
+ /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
+ }
+ """
+ mounts = {}
+ for source, destination in self.volume_mounts.items():
+ source = os.path.join(data_dir, source)
+ mounts[source] = destination
+ return mounts
+
+ def customize_container_mounts(
+ self, ctx: CephadmContext, mounts: Dict[str, str]
+ ) -> None:
+ data_dir = self.identity.data_dir(ctx.data_dir)
+ mounts.update(self._get_container_mounts(data_dir))
+
+ def _get_container_binds(self, data_dir: str) -> List[List[str]]:
+ """
+ Get the bind mounts. Relative `source=...` paths will be located below
+ `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
+
+ Example:
+ [
+ 'type=bind',
+ 'source=lib/modules',
+ 'destination=/lib/modules',
+ 'ro=true'
+ ]
+ becomes
+ [
+ ...
+ 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules',
+ ...
+ ]
+ """
+ binds = self.bind_mounts.copy()
+ for bind in binds:
+ for index, value in enumerate(bind):
+ match = re.match(r'^source=(.+)$', value)
+ if match:
+ bind[index] = 'source={}'.format(
+ os.path.join(data_dir, match.group(1))
+ )
+ return binds
+
+ def customize_container_binds(
+ self, ctx: CephadmContext, binds: List[List[str]]
+ ) -> None:
+ data_dir = self.identity.data_dir(ctx.data_dir)
+ binds.extend(self._get_container_binds(data_dir))
+
+ # Cache the container so we don't need to rebuild it again when calling
+ # into init_containers
+ _container: Optional[CephContainer] = None
+
+ def container(self, ctx: CephadmContext) -> CephContainer:
+ if self._container is None:
+ ctr = daemon_to_container(
+ ctx,
+ self,
+ host_network=False,
+ privileged=self.privileged,
+ ptrace=ctx.allow_ptrace,
+ )
+ self._container = to_deployment_container(ctx, ctr)
+ return self._container
+
+ def init_containers(self, ctx: CephadmContext) -> List[InitContainer]:
+ primary = self.container(ctx)
+ init_containers: List[Dict[str, Any]] = getattr(
+ ctx, 'init_containers', []
+ )
+ return [
+ InitContainer.from_primary_and_opts(ctx, primary, ic_opts)
+ for ic_opts in init_containers
+ ]
+
+ def customize_container_endpoints(
+ self, endpoints: List[EndPoint], deployment_type: DeploymentType
+ ) -> None:
+ if deployment_type == DeploymentType.DEFAULT:
+ endpoints.extend([EndPoint('0.0.0.0', p) for p in self.ports])
+
+ def customize_container_envs(
+ self, ctx: CephadmContext, envs: List[str]
+ ) -> None:
+ envs.extend(self.get_container_envs())
+
+ def customize_container_args(
+ self, ctx: CephadmContext, args: List[str]
+ ) -> None:
+ args.extend(self.get_container_args())
+
+ def customize_process_args(
+ self, ctx: CephadmContext, args: List[str]
+ ) -> None:
+ args.extend(self.get_daemon_args())
+
+ def default_entrypoint(self) -> str:
+ return self.entrypoint or ''
+
+ def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
+ return self.uid, self.gid
diff --git a/src/cephadm/cephadmlib/daemons/ingress.py b/src/cephadm/cephadmlib/daemons/ingress.py
new file mode 100644
index 00000000000..6064cf538fb
--- /dev/null
+++ b/src/cephadm/cephadmlib/daemons/ingress.py
@@ -0,0 +1,290 @@
+import os
+
+from typing import Dict, List, Optional, Tuple, Union
+
+from ..constants import (
+ DEFAULT_HAPROXY_IMAGE,
+ DEFAULT_KEEPALIVED_IMAGE,
+ DATA_DIR_MODE,
+)
+from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
+from ..container_types import CephContainer, extract_uid_gid
+from ..context import CephadmContext
+from ..context_getters import fetch_configs
+from ..daemon_form import register as register_daemon_form
+from ..daemon_identity import DaemonIdentity
+from ..data_utils import dict_get, is_fsid
+from ..deployment_utils import to_deployment_container
+from ..exceptions import Error
+from ..file_utils import makedirs, populate_files
+
+
+@register_daemon_form
+class HAproxy(ContainerDaemonForm):
+ """Defines an HAproxy container"""
+
+ daemon_type = 'haproxy'
+ required_files = ['haproxy.cfg']
+ default_image = DEFAULT_HAPROXY_IMAGE
+
+ @classmethod
+ def for_daemon_type(cls, daemon_type: str) -> bool:
+ return cls.daemon_type == daemon_type
+
+ def __init__(
+ self,
+ ctx: CephadmContext,
+ fsid: str,
+ daemon_id: Union[int, str],
+ config_json: Dict,
+ image: str,
+ ) -> None:
+ self.ctx = ctx
+ self.fsid = fsid
+ self.daemon_id = daemon_id
+ self.image = image
+
+ # config-json options
+ self.files = dict_get(config_json, 'files', {})
+
+ self.validate()
+
+ @classmethod
+ def init(
+ cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str]
+ ) -> 'HAproxy':
+ return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
+
+ @classmethod
+ def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'HAproxy':
+ return cls.init(ctx, ident.fsid, ident.daemon_id)
+
+ @property
+ def identity(self) -> DaemonIdentity:
+ return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
+
+ def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
+ """Create files under the container data dir"""
+ if not os.path.isdir(data_dir):
+ raise OSError('data_dir is not a directory: %s' % (data_dir))
+
+ # create additional directories in data dir for HAproxy to use
+ if not os.path.isdir(os.path.join(data_dir, 'haproxy')):
+ makedirs(
+ os.path.join(data_dir, 'haproxy'), uid, gid, DATA_DIR_MODE
+ )
+
+ data_dir = os.path.join(data_dir, 'haproxy')
+ populate_files(data_dir, self.files, uid, gid)
+
+ def get_daemon_args(self) -> List[str]:
+ return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg']
+
+ def validate(self):
+ # type: () -> None
+ if not is_fsid(self.fsid):
+ raise Error('not an fsid: %s' % self.fsid)
+ if not self.daemon_id:
+ raise Error('invalid daemon_id: %s' % self.daemon_id)
+ if not self.image:
+ raise Error('invalid image: %s' % self.image)
+
+ # check for the required files
+ if self.required_files:
+ for fname in self.required_files:
+ if fname not in self.files:
+ raise Error(
+ 'required file missing from config-json: %s' % fname
+ )
+
+ def get_daemon_name(self):
+ # type: () -> str
+ return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+ def get_container_name(self, desc=None):
+ # type: (Optional[str]) -> str
+ cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
+ if desc:
+ cname = '%s-%s' % (cname, desc)
+ return cname
+
+ def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
+ # better directory for this?
+ print('UUUUU', extract_uid_gid)
+ return extract_uid_gid(self.ctx, file_path='/var/lib')
+
+ @staticmethod
+ def _get_container_mounts(data_dir: str) -> Dict[str, str]:
+ mounts = dict()
+ mounts[os.path.join(data_dir, 'haproxy')] = '/var/lib/haproxy'
+ return mounts
+
+ def customize_container_mounts(
+ self, ctx: CephadmContext, mounts: Dict[str, str]
+ ) -> None:
+ data_dir = self.identity.data_dir(ctx.data_dir)
+ mounts.update(self._get_container_mounts(data_dir))
+
+ @staticmethod
+ def get_sysctl_settings() -> List[str]:
+ return [
+ '# IP forwarding and non-local bind',
+ 'net.ipv4.ip_forward = 1',
+ 'net.ipv4.ip_nonlocal_bind = 1',
+ ]
+
+ def container(self, ctx: CephadmContext) -> CephContainer:
+ ctr = daemon_to_container(ctx, self)
+ return to_deployment_container(ctx, ctr)
+
+ def customize_container_args(
+ self, ctx: CephadmContext, args: List[str]
+ ) -> None:
+ args.extend(
+ ['--user=root']
+ ) # haproxy 2.4 defaults to a different user
+
+ def customize_process_args(
+ self, ctx: CephadmContext, args: List[str]
+ ) -> None:
+ args.extend(self.get_daemon_args())
+
+
+@register_daemon_form
+class Keepalived(ContainerDaemonForm):
+ """Defines an Keepalived container"""
+
+ daemon_type = 'keepalived'
+ required_files = ['keepalived.conf']
+ default_image = DEFAULT_KEEPALIVED_IMAGE
+
+ @classmethod
+ def for_daemon_type(cls, daemon_type: str) -> bool:
+ return cls.daemon_type == daemon_type
+
+ def __init__(
+ self,
+ ctx: CephadmContext,
+ fsid: str,
+ daemon_id: Union[int, str],
+ config_json: Dict,
+ image: str,
+ ) -> None:
+ self.ctx = ctx
+ self.fsid = fsid
+ self.daemon_id = daemon_id
+ self.image = image
+
+ # config-json options
+ self.files = dict_get(config_json, 'files', {})
+
+ self.validate()
+
+ @classmethod
+ def init(
+ cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str]
+ ) -> 'Keepalived':
+ return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
+
+ @classmethod
+ def create(
+ cls, ctx: CephadmContext, ident: DaemonIdentity
+ ) -> 'Keepalived':
+ return cls.init(ctx, ident.fsid, ident.daemon_id)
+
+ @property
+ def identity(self) -> DaemonIdentity:
+ return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
+
+ def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
+ """Create files under the container data dir"""
+ if not os.path.isdir(data_dir):
+ raise OSError('data_dir is not a directory: %s' % (data_dir))
+
+ # create additional directories in data dir for keepalived to use
+ if not os.path.isdir(os.path.join(data_dir, 'keepalived')):
+ makedirs(
+ os.path.join(data_dir, 'keepalived'), uid, gid, DATA_DIR_MODE
+ )
+
+ # populate files from the config-json
+ populate_files(data_dir, self.files, uid, gid)
+
+ def validate(self):
+ # type: () -> None
+ if not is_fsid(self.fsid):
+ raise Error('not an fsid: %s' % self.fsid)
+ if not self.daemon_id:
+ raise Error('invalid daemon_id: %s' % self.daemon_id)
+ if not self.image:
+ raise Error('invalid image: %s' % self.image)
+
+ # check for the required files
+ if self.required_files:
+ for fname in self.required_files:
+ if fname not in self.files:
+ raise Error(
+ 'required file missing from config-json: %s' % fname
+ )
+
+ def get_daemon_name(self):
+ # type: () -> str
+ return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+ def get_container_name(self, desc=None):
+ # type: (Optional[str]) -> str
+ cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
+ if desc:
+ cname = '%s-%s' % (cname, desc)
+ return cname
+
+ @staticmethod
+ def get_container_envs():
+ # type: () -> List[str]
+ envs = [
+ 'KEEPALIVED_AUTOCONF=false',
+ 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf',
+ 'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf',
+ 'KEEPALIVED_DEBUG=false',
+ ]
+ return envs
+
+ @staticmethod
+ def get_sysctl_settings() -> List[str]:
+ return [
+ '# IP forwarding and non-local bind',
+ 'net.ipv4.ip_forward = 1',
+ 'net.ipv4.ip_nonlocal_bind = 1',
+ ]
+
+ def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
+ # better directory for this?
+ return extract_uid_gid(self.ctx, file_path='/var/lib')
+
+ @staticmethod
+ def _get_container_mounts(data_dir: str) -> Dict[str, str]:
+ mounts = dict()
+ mounts[
+ os.path.join(data_dir, 'keepalived.conf')
+ ] = '/etc/keepalived/keepalived.conf'
+ return mounts
+
+ def customize_container_mounts(
+ self, ctx: CephadmContext, mounts: Dict[str, str]
+ ) -> None:
+ data_dir = self.identity.data_dir(ctx.data_dir)
+ mounts.update(self._get_container_mounts(data_dir))
+
+ def container(self, ctx: CephadmContext) -> CephContainer:
+ ctr = daemon_to_container(ctx, self)
+ return to_deployment_container(ctx, ctr)
+
+ def customize_container_envs(
+ self, ctx: CephadmContext, envs: List[str]
+ ) -> None:
+ envs.extend(self.get_container_envs())
+
+ def customize_container_args(
+ self, ctx: CephadmContext, args: List[str]
+ ) -> None:
+ args.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW'])
diff --git a/src/cephadm/cephadmlib/daemons/iscsi.py b/src/cephadm/cephadmlib/daemons/iscsi.py
new file mode 100644
index 00000000000..1845a37bf4e
--- /dev/null
+++ b/src/cephadm/cephadmlib/daemons/iscsi.py
@@ -0,0 +1,286 @@
+import logging
+import os
+import re
+
+from typing import Dict, List, Optional, Tuple
+
+from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
+from ..container_types import CephContainer, extract_uid_gid
+from ..context_getters import fetch_configs, get_config_and_keyring
+from ..daemon_form import register as register_daemon_form
+from ..daemon_identity import DaemonIdentity, DaemonSubIdentity
+from ..constants import DEFAULT_IMAGE
+from ..context import CephadmContext
+from ..data_utils import dict_get, is_fsid
+from ..deployment_utils import to_deployment_container
+from ..exceptions import Error
+from ..file_utils import makedirs, populate_files
+from ..call_wrappers import call, CallVerbosity
+
+
+logger = logging.getLogger()
+
+
+@register_daemon_form
+class CephIscsi(ContainerDaemonForm):
+ """Defines a Ceph-Iscsi container"""
+
+ daemon_type = 'iscsi'
+ entrypoint = '/usr/bin/rbd-target-api'
+
+ required_files = ['iscsi-gateway.cfg']
+
+ @classmethod
+ def for_daemon_type(cls, daemon_type: str) -> bool:
+ return cls.daemon_type == daemon_type
+
+ def __init__(
+ self,
+ ctx: CephadmContext,
+ ident: DaemonIdentity,
+ config_json: Dict,
+ image: str = DEFAULT_IMAGE,
+ ):
+ self.ctx = ctx
+ self._identity = ident
+ self.image = image
+
+ # config-json options
+ self.files = dict_get(config_json, 'files', {})
+
+ # validate the supplied args
+ self.validate()
+
+ @classmethod
+ def init(
+ cls, ctx: CephadmContext, fsid: str, daemon_id: str
+ ) -> 'CephIscsi':
+ return cls.create(
+ ctx, DaemonIdentity(fsid, cls.daemon_type, daemon_id)
+ )
+
+ @classmethod
+ def create(
+ cls, ctx: CephadmContext, ident: DaemonIdentity
+ ) -> 'CephIscsi':
+ return cls(ctx, ident, fetch_configs(ctx), ctx.image)
+
+ @property
+ def identity(self) -> DaemonIdentity:
+ return self._identity
+
+ @property
+ def fsid(self) -> str:
+ return self._identity.fsid
+
+ @property
+ def daemon_id(self) -> str:
+ return self._identity.daemon_id
+
+ @staticmethod
+ def _get_container_mounts(data_dir, log_dir):
+ # type: (str, str) -> Dict[str, str]
+ mounts = dict()
+ mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
+ mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
+ mounts[
+ os.path.join(data_dir, 'iscsi-gateway.cfg')
+ ] = '/etc/ceph/iscsi-gateway.cfg:z'
+ mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
+ mounts[
+ os.path.join(data_dir, 'tcmu-runner-entrypoint.sh')
+ ] = '/usr/local/scripts/tcmu-runner-entrypoint.sh'
+ mounts[log_dir] = '/var/log:z'
+ mounts['/dev'] = '/dev'
+ return mounts
+
+ def customize_container_mounts(
+ self, ctx: CephadmContext, mounts: Dict[str, str]
+ ) -> None:
+ data_dir = self.identity.data_dir(ctx.data_dir)
+ # Removes ending ".tcmu" from data_dir a tcmu-runner uses the same
+ # data_dir as rbd-runner-api
+ if data_dir.endswith('.tcmu'):
+ data_dir = re.sub(r'\.tcmu$', '', data_dir)
+ log_dir = os.path.join(ctx.log_dir, self.identity.fsid)
+ mounts.update(CephIscsi._get_container_mounts(data_dir, log_dir))
+
+ def customize_container_binds(
+ self, ctx: CephadmContext, binds: List[List[str]]
+ ) -> None:
+ lib_modules = [
+ 'type=bind',
+ 'source=/lib/modules',
+ 'destination=/lib/modules',
+ 'ro=true',
+ ]
+ binds.append(lib_modules)
+
+ @staticmethod
+ def get_version(ctx, container_id):
+ # type: (CephadmContext, str) -> Optional[str]
+ version = None
+ out, err, code = call(
+ ctx,
+ [
+ ctx.container_engine.path,
+ 'exec',
+ container_id,
+ '/usr/bin/python3',
+ '-c',
+ "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)",
+ ],
+ verbosity=CallVerbosity.QUIET,
+ )
+ if code == 0:
+ version = out.strip()
+ return version
+
+ def validate(self):
+ # type: () -> None
+ if not is_fsid(self.fsid):
+ raise Error('not an fsid: %s' % self.fsid)
+ if not self.daemon_id:
+ raise Error('invalid daemon_id: %s' % self.daemon_id)
+ if not self.image:
+ raise Error('invalid image: %s' % self.image)
+
+ # check for the required files
+ if self.required_files:
+ for fname in self.required_files:
+ if fname not in self.files:
+ raise Error(
+ 'required file missing from config-json: %s' % fname
+ )
+
+ def get_daemon_name(self):
+ # type: () -> str
+ return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+ def get_container_name(self, desc=None):
+ # type: (Optional[str]) -> str
+ cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
+ if desc:
+ cname = '%s-%s' % (cname, desc)
+ return cname
+
+ def create_daemon_dirs(self, data_dir, uid, gid):
+ # type: (str, int, int) -> None
+ """Create files under the container data dir"""
+ if not os.path.isdir(data_dir):
+ raise OSError('data_dir is not a directory: %s' % (data_dir))
+
+ logger.info('Creating ceph-iscsi config...')
+ configfs_dir = os.path.join(data_dir, 'configfs')
+ makedirs(configfs_dir, uid, gid, 0o755)
+
+ # set up the tcmu-runner entrypoint script
+ # to be mounted into the container. For more info
+ # on why we need this script, see the
+ # tcmu_runner_entrypoint_script function
+ self.files[
+ 'tcmu-runner-entrypoint.sh'
+ ] = self.tcmu_runner_entrypoint_script()
+
+ # populate files from the config-json
+ populate_files(data_dir, self.files, uid, gid)
+
+ # we want the tcmu runner entrypoint script to be executable
+ # populate_files will give it 0o600 by default
+ os.chmod(os.path.join(data_dir, 'tcmu-runner-entrypoint.sh'), 0o700)
+
+ @staticmethod
+ def configfs_mount_umount(data_dir, mount=True):
+ # type: (str, bool) -> List[str]
+ mount_path = os.path.join(data_dir, 'configfs')
+ if mount:
+ cmd = (
+ 'if ! grep -qs {0} /proc/mounts; then '
+ 'mount -t configfs none {0}; fi'.format(mount_path)
+ )
+ else:
+ cmd = (
+ 'if grep -qs {0} /proc/mounts; then '
+ 'umount {0}; fi'.format(mount_path)
+ )
+ return cmd.split()
+
+ @staticmethod
+ def tcmu_runner_entrypoint_script() -> str:
+ # since we are having tcmu-runner be a background
+ # process in its systemd unit (rbd-target-api being
+ # the main process) systemd will not restart it when
+ # it fails. in order to try and get around that for now
+ # we can have a script mounted in the container that
+ # that attempts to do the restarting for us. This script
+ # can then become the entrypoint for the tcmu-runner
+ # container
+
+ # This is intended to be dropped for a better solution
+ # for at least the squid release onward
+ return """#!/bin/bash
+RUN_DIR=/var/run/tcmu-runner
+
+if [ ! -d "${RUN_DIR}" ] ; then
+ mkdir -p "${RUN_DIR}"
+fi
+
+rm -rf "${RUN_DIR}"/*
+
+while true
+do
+ touch "${RUN_DIR}"/start-up-$(date -Ins)
+ /usr/bin/tcmu-runner
+
+ # If we got around 3 kills/segfaults in the last minute,
+ # don't start anymore
+ if [ $(find "${RUN_DIR}" -type f -cmin -1 | wc -l) -ge 3 ] ; then
+ exit 0
+ fi
+
+ sleep 1
+done
+"""
+
+ def get_tcmu_runner_container(self):
+ # type: () -> CephContainer
+ # daemon_id, is used to generated the cid and pid files used by podman but as both tcmu-runner
+ # and rbd-target-api have the same daemon_id, it conflits and prevent the second container from
+ # starting. .tcmu runner is appended to the daemon_id to fix that.
+ subident = DaemonSubIdentity(
+ self.fsid, self.daemon_type, self.daemon_id, 'tcmu'
+ )
+ tcmu_dmn = self.create(self.ctx, subident)
+ tcmu_container = to_deployment_container(
+ self.ctx, daemon_to_container(self.ctx, tcmu_dmn, privileged=True)
+ )
+ # TODO: Eventually we don't want to run tcmu-runner through this script.
+ # This is intended to be a workaround backported to older releases
+ # and should eventually be removed in at least squid onward
+ tcmu_container.entrypoint = (
+ '/usr/local/scripts/tcmu-runner-entrypoint.sh'
+ )
+ tcmu_container.cname = self.get_container_name(desc='tcmu')
+ return tcmu_container
+
+ def container(self, ctx: CephadmContext) -> CephContainer:
+ # So the container can modprobe iscsi_target_mod and have write perms
+ # to configfs we need to make this a privileged container.
+ ctr = daemon_to_container(ctx, self, privileged=True)
+ return to_deployment_container(ctx, ctr)
+
+ def config_and_keyring(
+ self, ctx: CephadmContext
+ ) -> Tuple[Optional[str], Optional[str]]:
+ return get_config_and_keyring(ctx)
+
+ def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
+ return extract_uid_gid(ctx)
+
+ def default_entrypoint(self) -> str:
+ return self.entrypoint
+
+ def customize_container_args(
+ self, ctx: CephadmContext, args: List[str]
+ ) -> None:
+ args.append(ctx.container_engine.unlimited_pids_option)
diff --git a/src/cephadm/cephadmlib/daemons/monitoring.py b/src/cephadm/cephadmlib/daemons/monitoring.py
new file mode 100644
index 00000000000..e0667853dd7
--- /dev/null
+++ b/src/cephadm/cephadmlib/daemons/monitoring.py
@@ -0,0 +1,377 @@
+import os
+
+from typing import Dict, List, Tuple
+
+from ..call_wrappers import call, CallVerbosity
+from ..constants import (
+ DEFAULT_ALERT_MANAGER_IMAGE,
+ DEFAULT_GRAFANA_IMAGE,
+ DEFAULT_LOKI_IMAGE,
+ DEFAULT_NODE_EXPORTER_IMAGE,
+ DEFAULT_PROMETHEUS_IMAGE,
+ DEFAULT_PROMTAIL_IMAGE,
+)
+from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
+from ..container_types import CephContainer, extract_uid_gid
+from ..context import CephadmContext
+from ..context_getters import fetch_configs, fetch_meta
+from ..daemon_form import register as register_daemon_form
+from ..daemon_identity import DaemonIdentity
+from ..deployment_utils import to_deployment_container
+from ..exceptions import Error
+from ..net_utils import get_fqdn, get_hostname, get_ip_addresses, wrap_ipv6
+
+
+@register_daemon_form
+class Monitoring(ContainerDaemonForm):
+ """Define the configs for the monitoring containers"""
+
+ port_map = {
+ 'prometheus': [
+ 9095
+ ], # Avoid default 9090, due to conflict with cockpit UI
+ 'node-exporter': [9100],
+ 'grafana': [3000],
+ 'alertmanager': [9093, 9094],
+ 'loki': [3100],
+ 'promtail': [9080],
+ }
+
+ components = {
+ 'prometheus': {
+ 'image': DEFAULT_PROMETHEUS_IMAGE,
+ 'cpus': '2',
+ 'memory': '4GB',
+ 'args': [
+ '--config.file=/etc/prometheus/prometheus.yml',
+ '--storage.tsdb.path=/prometheus',
+ ],
+ 'config-json-files': [
+ 'prometheus.yml',
+ ],
+ },
+ 'loki': {
+ 'image': DEFAULT_LOKI_IMAGE,
+ 'cpus': '1',
+ 'memory': '1GB',
+ 'args': [
+ '--config.file=/etc/loki/loki.yml',
+ ],
+ 'config-json-files': ['loki.yml'],
+ },
+ 'promtail': {
+ 'image': DEFAULT_PROMTAIL_IMAGE,
+ 'cpus': '1',
+ 'memory': '1GB',
+ 'args': [
+ '--config.file=/etc/promtail/promtail.yml',
+ ],
+ 'config-json-files': [
+ 'promtail.yml',
+ ],
+ },
+ 'node-exporter': {
+ 'image': DEFAULT_NODE_EXPORTER_IMAGE,
+ 'cpus': '1',
+ 'memory': '1GB',
+ 'args': ['--no-collector.timex'],
+ },
+ 'grafana': {
+ 'image': DEFAULT_GRAFANA_IMAGE,
+ 'cpus': '2',
+ 'memory': '4GB',
+ 'args': [],
+ 'config-json-files': [
+ 'grafana.ini',
+ 'provisioning/datasources/ceph-dashboard.yml',
+ 'certs/cert_file',
+ 'certs/cert_key',
+ ],
+ },
+ 'alertmanager': {
+ 'image': DEFAULT_ALERT_MANAGER_IMAGE,
+ 'cpus': '2',
+ 'memory': '2GB',
+ 'args': [
+ '--cluster.listen-address=:{}'.format(
+ port_map['alertmanager'][1]
+ ),
+ ],
+ 'config-json-files': [
+ 'alertmanager.yml',
+ ],
+ 'config-json-args': [
+ 'peers',
+ ],
+ },
+ } # type: ignore
+
+ @classmethod
+ def for_daemon_type(cls, daemon_type: str) -> bool:
+ return daemon_type in cls.components
+
+ @staticmethod
+ def get_version(ctx, container_id, daemon_type):
+ # type: (CephadmContext, str, str) -> str
+ """
+ :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter"
+ """
+ assert daemon_type in (
+ 'prometheus',
+ 'alertmanager',
+ 'node-exporter',
+ 'loki',
+ 'promtail',
+ )
+ cmd = daemon_type.replace('-', '_')
+ code = -1
+ err = ''
+ out = ''
+ version = ''
+ if daemon_type == 'alertmanager':
+ for cmd in ['alertmanager', 'prometheus-alertmanager']:
+ out, err, code = call(
+ ctx,
+ [
+ ctx.container_engine.path,
+ 'exec',
+ container_id,
+ cmd,
+ '--version',
+ ],
+ verbosity=CallVerbosity.QUIET,
+ )
+ if code == 0:
+ break
+ cmd = 'alertmanager' # reset cmd for version extraction
+ else:
+ out, err, code = call(
+ ctx,
+ [
+ ctx.container_engine.path,
+ 'exec',
+ container_id,
+ cmd,
+ '--version',
+ ],
+ verbosity=CallVerbosity.QUIET,
+ )
+ if code == 0:
+ if err.startswith('%s, version ' % cmd):
+ version = err.split(' ')[2]
+ elif out.startswith('%s, version ' % cmd):
+ version = out.split(' ')[2]
+ return version
+
+ @staticmethod
+ def extract_uid_gid(
+ ctx: CephadmContext, daemon_type: str
+ ) -> Tuple[int, int]:
+ if daemon_type == 'prometheus':
+ uid, gid = extract_uid_gid(ctx, file_path='/etc/prometheus')
+ elif daemon_type == 'node-exporter':
+ uid, gid = 65534, 65534
+ elif daemon_type == 'grafana':
+ uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana')
+ elif daemon_type == 'loki':
+ uid, gid = extract_uid_gid(ctx, file_path='/etc/loki')
+ elif daemon_type == 'promtail':
+ uid, gid = extract_uid_gid(ctx, file_path='/etc/promtail')
+ elif daemon_type == 'alertmanager':
+ uid, gid = extract_uid_gid(
+ ctx, file_path=['/etc/alertmanager', '/etc/prometheus']
+ )
+ else:
+ raise Error('{} not implemented yet'.format(daemon_type))
+ return uid, gid
+
+ def __init__(self, ctx: CephadmContext, ident: DaemonIdentity) -> None:
+ self.ctx = ctx
+ self._identity = ident
+
+ @classmethod
+ def create(
+ cls, ctx: CephadmContext, ident: DaemonIdentity
+ ) -> 'Monitoring':
+ return cls(ctx, ident)
+
+ @property
+ def identity(self) -> DaemonIdentity:
+ return self._identity
+
+ def container(self, ctx: CephadmContext) -> CephContainer:
+ self._prevalidate(ctx)
+ ctr = daemon_to_container(ctx, self)
+ return to_deployment_container(ctx, ctr)
+
+ def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
+ return self.extract_uid_gid(ctx, self.identity.daemon_type)
+
+ def _prevalidate(self, ctx: CephadmContext) -> None:
+ # before being refactored into a ContainerDaemonForm these checks were
+ # done inside the deploy function. This was the only "family" of daemons
+ # that performed these checks in that location
+ daemon_type = self.identity.daemon_type
+ config = fetch_configs(ctx) # type: ignore
+ required_files = self.components[daemon_type].get(
+ 'config-json-files', list()
+ )
+ required_args = self.components[daemon_type].get(
+ 'config-json-args', list()
+ )
+ if required_files:
+ if not config or not all(c in config.get('files', {}).keys() for c in required_files): # type: ignore
+ raise Error(
+ '{} deployment requires config-json which must '
+ 'contain file content for {}'.format(
+ daemon_type.capitalize(), ', '.join(required_files)
+ )
+ )
+ if required_args:
+ if not config or not all(c in config.keys() for c in required_args): # type: ignore
+ raise Error(
+ '{} deployment requires config-json which must '
+ 'contain arg for {}'.format(
+ daemon_type.capitalize(), ', '.join(required_args)
+ )
+ )
+
+ def get_daemon_args(self) -> List[str]:
+ ctx = self.ctx
+ daemon_type = self.identity.daemon_type
+ metadata = self.components[daemon_type]
+ r = list(metadata.get('args', []))
+ # set ip and port to bind to for nodeexporter,alertmanager,prometheus
+ if daemon_type not in ['grafana', 'loki', 'promtail']:
+ ip = ''
+ port = self.port_map[daemon_type][0]
+ meta = fetch_meta(ctx)
+ if meta:
+ if 'ip' in meta and meta['ip']:
+ ip = meta['ip']
+ if 'ports' in meta and meta['ports']:
+ port = meta['ports'][0]
+ r += [f'--web.listen-address={ip}:{port}']
+ if daemon_type == 'prometheus':
+ config = fetch_configs(ctx)
+ retention_time = config.get('retention_time', '15d')
+ retention_size = config.get(
+ 'retention_size', '0'
+ ) # default to disabled
+ r += [f'--storage.tsdb.retention.time={retention_time}']
+ r += [f'--storage.tsdb.retention.size={retention_size}']
+ scheme = 'http'
+ host = get_fqdn()
+ # in case host is not an fqdn then we use the IP to
+ # avoid producing a broken web.external-url link
+ if '.' not in host:
+ ipv4_addrs, ipv6_addrs = get_ip_addresses(get_hostname())
+ # use the first ipv4 (if any) otherwise use the first ipv6
+ addr = next(iter(ipv4_addrs or ipv6_addrs), None)
+ host = wrap_ipv6(addr) if addr else host
+ r += [f'--web.external-url={scheme}://{host}:{port}']
+ if daemon_type == 'alertmanager':
+ config = fetch_configs(ctx)
+ peers = config.get('peers', list()) # type: ignore
+ for peer in peers:
+ r += ['--cluster.peer={}'.format(peer)]
+ try:
+ r += [f'--web.config.file={config["web_config"]}']
+ except KeyError:
+ pass
+ # some alertmanager, by default, look elsewhere for a config
+ r += ['--config.file=/etc/alertmanager/alertmanager.yml']
+ if daemon_type == 'promtail':
+ r += ['--config.expand-env']
+ if daemon_type == 'prometheus':
+ config = fetch_configs(ctx)
+ try:
+ r += [f'--web.config.file={config["web_config"]}']
+ except KeyError:
+ pass
+ if daemon_type == 'node-exporter':
+ config = fetch_configs(ctx)
+ try:
+ r += [f'--web.config.file={config["web_config"]}']
+ except KeyError:
+ pass
+ r += [
+ '--path.procfs=/host/proc',
+ '--path.sysfs=/host/sys',
+ '--path.rootfs=/rootfs',
+ ]
+ return r
+
+ def _get_container_mounts(self, data_dir: str) -> Dict[str, str]:
+ ctx = self.ctx
+ daemon_type = self.identity.daemon_type
+ mounts: Dict[str, str] = {}
+ log_dir = os.path.join(ctx.log_dir, self.identity.fsid)
+ if daemon_type == 'prometheus':
+ mounts[
+ os.path.join(data_dir, 'etc/prometheus')
+ ] = '/etc/prometheus:Z'
+ mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z'
+ elif daemon_type == 'loki':
+ mounts[os.path.join(data_dir, 'etc/loki')] = '/etc/loki:Z'
+ mounts[os.path.join(data_dir, 'data')] = '/loki:Z'
+ elif daemon_type == 'promtail':
+ mounts[os.path.join(data_dir, 'etc/promtail')] = '/etc/promtail:Z'
+ mounts[log_dir] = '/var/log/ceph:z'
+ mounts[os.path.join(data_dir, 'data')] = '/promtail:Z'
+ elif daemon_type == 'node-exporter':
+ mounts[
+ os.path.join(data_dir, 'etc/node-exporter')
+ ] = '/etc/node-exporter:Z'
+ mounts['/proc'] = '/host/proc:ro'
+ mounts['/sys'] = '/host/sys:ro'
+ mounts['/'] = '/rootfs:ro'
+ elif daemon_type == 'grafana':
+ mounts[
+ os.path.join(data_dir, 'etc/grafana/grafana.ini')
+ ] = '/etc/grafana/grafana.ini:Z'
+ mounts[
+ os.path.join(data_dir, 'etc/grafana/provisioning/datasources')
+ ] = '/etc/grafana/provisioning/datasources:Z'
+ mounts[
+ os.path.join(data_dir, 'etc/grafana/certs')
+ ] = '/etc/grafana/certs:Z'
+ mounts[
+ os.path.join(data_dir, 'data/grafana.db')
+ ] = '/var/lib/grafana/grafana.db:Z'
+ elif daemon_type == 'alertmanager':
+ mounts[
+ os.path.join(data_dir, 'etc/alertmanager')
+ ] = '/etc/alertmanager:Z'
+ return mounts
+
+ def customize_container_mounts(
+ self, ctx: CephadmContext, mounts: Dict[str, str]
+ ) -> None:
+ data_dir = self.identity.data_dir(ctx.data_dir)
+ mounts.update(self._get_container_mounts(data_dir))
+
+ def customize_container_args(
+ self, ctx: CephadmContext, args: List[str]
+ ) -> None:
+ uid, _ = self.uid_gid(ctx)
+ monitoring_args = [
+ '--user',
+ str(uid),
+ # FIXME: disable cpu/memory limits for the time being (not supported
+ # by ubuntu 18.04 kernel!)
+ ]
+ args.extend(monitoring_args)
+ if self.identity.daemon_type == 'node-exporter':
+ # in order to support setting '--path.procfs=/host/proc','--path.sysfs=/host/sys',
+ # '--path.rootfs=/rootfs' for node-exporter we need to disable selinux separation
+ # between the node-exporter container and the host to avoid selinux denials
+ args.extend(['--security-opt', 'label=disable'])
+
+ def customize_process_args(
+ self, ctx: CephadmContext, args: List[str]
+ ) -> None:
+ args.extend(self.get_daemon_args())
+
+ def default_entrypoint(self) -> str:
+ return ''
diff --git a/src/cephadm/cephadmlib/daemons/nfs.py b/src/cephadm/cephadmlib/daemons/nfs.py
new file mode 100644
index 00000000000..6e2f2a945ca
--- /dev/null
+++ b/src/cephadm/cephadmlib/daemons/nfs.py
@@ -0,0 +1,225 @@
+import logging
+import os
+import re
+
+from typing import Dict, List, Optional, Tuple, Union
+
+from ..call_wrappers import call, CallVerbosity
+from ..constants import DEFAULT_IMAGE, CEPH_DEFAULT_CONF
+from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
+from ..container_types import CephContainer, extract_uid_gid
+from ..context import CephadmContext
+from ..context_getters import fetch_configs, get_config_and_keyring
+from ..daemon_form import register as register_daemon_form
+from ..daemon_identity import DaemonIdentity
+from ..data_utils import dict_get, is_fsid
+from ..deploy import DeploymentType
+from ..deployment_utils import to_deployment_container
+from ..exceptions import Error
+from ..file_utils import makedirs, populate_files, write_new
+from ..net_utils import EndPoint
+
+
+logger = logging.getLogger()
+
+
+@register_daemon_form
+class NFSGanesha(ContainerDaemonForm):
+ """Defines a NFS-Ganesha container"""
+
+ daemon_type = 'nfs'
+ entrypoint = '/usr/bin/ganesha.nfsd'
+ daemon_args = ['-F', '-L', 'STDERR']
+
+ required_files = ['ganesha.conf']
+
+ port_map = {
+ 'nfs': 2049,
+ }
+
+ @classmethod
+ def for_daemon_type(cls, daemon_type: str) -> bool:
+ return cls.daemon_type == daemon_type
+
+ def __init__(
+ self, ctx, fsid, daemon_id, config_json, image=DEFAULT_IMAGE
+ ):
+ # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
+ self.ctx = ctx
+ self.fsid = fsid
+ self.daemon_id = daemon_id
+ self.image = image
+
+ # config-json options
+ self.pool = dict_get(config_json, 'pool', require=True)
+ self.namespace = dict_get(config_json, 'namespace')
+ self.userid = dict_get(config_json, 'userid')
+ self.extra_args = dict_get(config_json, 'extra_args', [])
+ self.files = dict_get(config_json, 'files', {})
+ self.rgw = dict_get(config_json, 'rgw', {})
+
+ # validate the supplied args
+ self.validate()
+
+ @classmethod
+ def init(cls, ctx, fsid, daemon_id):
+ # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha
+ return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
+
+ @classmethod
+ def create(
+ cls, ctx: CephadmContext, ident: DaemonIdentity
+ ) -> 'NFSGanesha':
+ return cls.init(ctx, ident.fsid, ident.daemon_id)
+
+ @property
+ def identity(self) -> DaemonIdentity:
+ return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
+
+ def _get_container_mounts(self, data_dir):
+ # type: (str) -> Dict[str, str]
+ mounts = dict()
+ mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
+ mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
+ mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z'
+ if self.rgw:
+ cluster = self.rgw.get('cluster', 'ceph')
+ rgw_user = self.rgw.get('user', 'admin')
+ mounts[
+ os.path.join(data_dir, 'keyring.rgw')
+ ] = '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user)
+ return mounts
+
+ def customize_container_mounts(
+ self, ctx: CephadmContext, mounts: Dict[str, str]
+ ) -> None:
+ data_dir = self.identity.data_dir(ctx.data_dir)
+ mounts.update(self._get_container_mounts(data_dir))
+
+ @staticmethod
+ def get_container_envs():
+ # type: () -> List[str]
+ envs = ['CEPH_CONF=%s' % (CEPH_DEFAULT_CONF)]
+ return envs
+
+ @staticmethod
+ def get_version(ctx, container_id):
+ # type: (CephadmContext, str) -> Optional[str]
+ version = None
+ out, err, code = call(
+ ctx,
+ [
+ ctx.container_engine.path,
+ 'exec',
+ container_id,
+ NFSGanesha.entrypoint,
+ '-v',
+ ],
+ verbosity=CallVerbosity.QUIET,
+ )
+ if code == 0:
+ match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out)
+ if match:
+ version = match.group(1)
+ return version
+
+ def validate(self):
+ # type: () -> None
+ if not is_fsid(self.fsid):
+ raise Error('not an fsid: %s' % self.fsid)
+ if not self.daemon_id:
+ raise Error('invalid daemon_id: %s' % self.daemon_id)
+ if not self.image:
+ raise Error('invalid image: %s' % self.image)
+
+ # check for the required files
+ if self.required_files:
+ for fname in self.required_files:
+ if fname not in self.files:
+ raise Error(
+ 'required file missing from config-json: %s' % fname
+ )
+
+ # check for an RGW config
+ if self.rgw:
+ if not self.rgw.get('keyring'):
+ raise Error('RGW keyring is missing')
+ if not self.rgw.get('user'):
+ raise Error('RGW user is missing')
+
+ def get_daemon_name(self):
+ # type: () -> str
+ return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+ def get_container_name(self, desc=None):
+ # type: (Optional[str]) -> str
+ cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
+ if desc:
+ cname = '%s-%s' % (cname, desc)
+ return cname
+
+ def get_daemon_args(self):
+ # type: () -> List[str]
+ return self.daemon_args + self.extra_args
+
+ def create_daemon_dirs(self, data_dir, uid, gid):
+ # type: (str, int, int) -> None
+ """Create files under the container data dir"""
+ if not os.path.isdir(data_dir):
+ raise OSError('data_dir is not a directory: %s' % (data_dir))
+
+ logger.info('Creating ganesha config...')
+
+ # create the ganesha conf dir
+ config_dir = os.path.join(data_dir, 'etc/ganesha')
+ makedirs(config_dir, uid, gid, 0o755)
+
+ # populate files from the config-json
+ populate_files(config_dir, self.files, uid, gid)
+
+ # write the RGW keyring
+ if self.rgw:
+ keyring_path = os.path.join(data_dir, 'keyring.rgw')
+ with write_new(keyring_path, owner=(uid, gid)) as f:
+ f.write(self.rgw.get('keyring', ''))
+
+ def firewall_service_name(self) -> str:
+ return 'nfs'
+
+ def container(self, ctx: CephadmContext) -> CephContainer:
+ ctr = daemon_to_container(ctx, self)
+ return to_deployment_container(ctx, ctr)
+
+ def customize_container_endpoints(
+ self, endpoints: List[EndPoint], deployment_type: DeploymentType
+ ) -> None:
+ if deployment_type == DeploymentType.DEFAULT and not endpoints:
+ nfs_ports = list(NFSGanesha.port_map.values())
+ endpoints.extend([EndPoint('0.0.0.0', p) for p in nfs_ports])
+
+ def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
+ # TODO: extract ganesha uid/gid (997, 994) ?
+ return extract_uid_gid(ctx)
+
+ def config_and_keyring(
+ self, ctx: CephadmContext
+ ) -> Tuple[Optional[str], Optional[str]]:
+ return get_config_and_keyring(ctx)
+
+ def customize_container_envs(
+ self, ctx: CephadmContext, envs: List[str]
+ ) -> None:
+ envs.extend(self.get_container_envs())
+
+ def customize_process_args(
+ self, ctx: CephadmContext, args: List[str]
+ ) -> None:
+ args.extend(self.get_daemon_args())
+
+ def customize_container_args(
+ self, ctx: CephadmContext, args: List[str]
+ ) -> None:
+ args.append(ctx.container_engine.unlimited_pids_option)
+
+ def default_entrypoint(self) -> str:
+ return self.entrypoint
diff --git a/src/cephadm/cephadmlib/daemons/nvmeof.py b/src/cephadm/cephadmlib/daemons/nvmeof.py
new file mode 100644
index 00000000000..39488406bc8
--- /dev/null
+++ b/src/cephadm/cephadmlib/daemons/nvmeof.py
@@ -0,0 +1,193 @@
+import logging
+import os
+
+from typing import Dict, List, Optional, Tuple, Union
+
+from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
+from ..container_types import CephContainer
+from ..context_getters import fetch_configs, get_config_and_keyring
+from ..daemon_form import register as register_daemon_form
+from ..daemon_identity import DaemonIdentity
+from ..constants import DEFAULT_NVMEOF_IMAGE
+from ..context import CephadmContext
+from ..data_utils import dict_get, is_fsid
+from ..deployment_utils import to_deployment_container
+from ..exceptions import Error
+from ..file_utils import makedirs, populate_files
+from ..call_wrappers import call
+
+
+logger = logging.getLogger()
+
+
+@register_daemon_form
+class CephNvmeof(ContainerDaemonForm):
+ """Defines a Ceph-Nvmeof container"""
+
+ daemon_type = 'nvmeof'
+ required_files = ['ceph-nvmeof.conf']
+ default_image = DEFAULT_NVMEOF_IMAGE
+
+ @classmethod
+ def for_daemon_type(cls, daemon_type: str) -> bool:
+ return cls.daemon_type == daemon_type
+
+ def __init__(
+ self, ctx, fsid, daemon_id, config_json, image=DEFAULT_NVMEOF_IMAGE
+ ):
+ # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
+ self.ctx = ctx
+ self.fsid = fsid
+ self.daemon_id = daemon_id
+ self.image = image
+
+ # config-json options
+ self.files = dict_get(config_json, 'files', {})
+
+ # validate the supplied args
+ self.validate()
+
+ @classmethod
+ def init(cls, ctx, fsid, daemon_id):
+ # type: (CephadmContext, str, Union[int, str]) -> CephNvmeof
+ return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
+
+ @classmethod
+ def create(
+ cls, ctx: CephadmContext, ident: DaemonIdentity
+ ) -> 'CephNvmeof':
+ return cls.init(ctx, ident.fsid, ident.daemon_id)
+
+ @property
+ def identity(self) -> DaemonIdentity:
+ return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
+
+ @staticmethod
+ def _get_container_mounts(data_dir: str) -> Dict[str, str]:
+ mounts = dict()
+ mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
+ mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
+ mounts[
+ os.path.join(data_dir, 'ceph-nvmeof.conf')
+ ] = '/src/ceph-nvmeof.conf:z'
+ mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
+ mounts['/dev/hugepages'] = '/dev/hugepages'
+ mounts['/dev/vfio/vfio'] = '/dev/vfio/vfio'
+ return mounts
+
+ def customize_container_mounts(
+ self, ctx: CephadmContext, mounts: Dict[str, str]
+ ) -> None:
+ data_dir = self.identity.data_dir(ctx.data_dir)
+ mounts.update(self._get_container_mounts(data_dir))
+
+ def customize_container_binds(
+ self, ctx: CephadmContext, binds: List[List[str]]
+ ) -> None:
+ lib_modules = [
+ 'type=bind',
+ 'source=/lib/modules',
+ 'destination=/lib/modules',
+ 'ro=true',
+ ]
+ binds.append(lib_modules)
+
+ @staticmethod
+ def get_version(ctx: CephadmContext, container_id: str) -> Optional[str]:
+ out, err, ret = call(
+ ctx,
+ [
+ ctx.container_engine.path,
+ 'inspect',
+ '--format',
+ '{{index .Config.Labels "io.ceph.version"}}',
+ ctx.image,
+ ],
+ )
+ version = None
+ if ret == 0:
+ version = out.strip()
+ return version
+
+ def validate(self):
+ # type: () -> None
+ if not is_fsid(self.fsid):
+ raise Error('not an fsid: %s' % self.fsid)
+ if not self.daemon_id:
+ raise Error('invalid daemon_id: %s' % self.daemon_id)
+ if not self.image:
+ raise Error('invalid image: %s' % self.image)
+
+ # check for the required files
+ if self.required_files:
+ for fname in self.required_files:
+ if fname not in self.files:
+ raise Error(
+ 'required file missing from config-json: %s' % fname
+ )
+
+ def get_daemon_name(self):
+ # type: () -> str
+ return '%s.%s' % (self.daemon_type, self.daemon_id)
+
+ def get_container_name(self, desc=None):
+ # type: (Optional[str]) -> str
+ cname = '%s-%s' % (self.fsid, self.get_daemon_name())
+ if desc:
+ cname = '%s-%s' % (cname, desc)
+ return cname
+
+ def create_daemon_dirs(self, data_dir, uid, gid):
+ # type: (str, int, int) -> None
+ """Create files under the container data dir"""
+ if not os.path.isdir(data_dir):
+ raise OSError('data_dir is not a directory: %s' % (data_dir))
+
+ logger.info('Creating ceph-nvmeof config...')
+ configfs_dir = os.path.join(data_dir, 'configfs')
+ makedirs(configfs_dir, uid, gid, 0o755)
+
+ # populate files from the config-json
+ populate_files(data_dir, self.files, uid, gid)
+
+ @staticmethod
+ def configfs_mount_umount(data_dir, mount=True):
+ # type: (str, bool) -> List[str]
+ mount_path = os.path.join(data_dir, 'configfs')
+ if mount:
+ cmd = (
+ 'if ! grep -qs {0} /proc/mounts; then '
+ 'mount -t configfs none {0}; fi'.format(mount_path)
+ )
+ else:
+ cmd = (
+ 'if grep -qs {0} /proc/mounts; then '
+ 'umount {0}; fi'.format(mount_path)
+ )
+ return cmd.split()
+
+ @staticmethod
+ def get_sysctl_settings() -> List[str]:
+ return [
+ 'vm.nr_hugepages = 4096',
+ ]
+
+ def container(self, ctx: CephadmContext) -> CephContainer:
+ ctr = daemon_to_container(ctx, self)
+ return to_deployment_container(ctx, ctr)
+
+ def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
+ return 167, 167 # TODO: need to get properly the uid/gid
+
+ def config_and_keyring(
+ self, ctx: CephadmContext
+ ) -> Tuple[Optional[str], Optional[str]]:
+ return get_config_and_keyring(ctx)
+
+ def customize_container_args(
+ self, ctx: CephadmContext, args: List[str]
+ ) -> None:
+ args.append(ctx.container_engine.unlimited_pids_option)
+ args.extend(['--ulimit', 'memlock=-1:-1'])
+ args.extend(['--ulimit', 'nofile=10240'])
+ args.extend(['--cap-add=SYS_ADMIN', '--cap-add=CAP_SYS_NICE'])
diff --git a/src/cephadm/cephadmlib/daemons/snmp.py b/src/cephadm/cephadmlib/daemons/snmp.py
new file mode 100644
index 00000000000..f334e5f7652
--- /dev/null
+++ b/src/cephadm/cephadmlib/daemons/snmp.py
@@ -0,0 +1,226 @@
+import json
+import os
+
+from typing import Any, Dict, List, Optional, Tuple, Union
+from urllib.error import HTTPError, URLError
+from urllib.request import urlopen
+
+from ..constants import DEFAULT_SNMP_GATEWAY_IMAGE
+from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
+from ..container_types import CephContainer
+from ..context import CephadmContext
+from ..context_getters import fetch_configs, fetch_endpoints
+from ..daemon_form import register as register_daemon_form
+from ..daemon_identity import DaemonIdentity
+from ..data_utils import is_fsid
+from ..deployment_utils import to_deployment_container
+from ..exceptions import Error
+from ..file_utils import write_new
+
+
+@register_daemon_form
+class SNMPGateway(ContainerDaemonForm):
+ """Defines an SNMP gateway between Prometheus and SNMP monitoring Frameworks"""
+
+ daemon_type = 'snmp-gateway'
+ SUPPORTED_VERSIONS = ['V2c', 'V3']
+ default_image = DEFAULT_SNMP_GATEWAY_IMAGE
+ DEFAULT_PORT = 9464
+ env_filename = 'snmp-gateway.conf'
+
+ @classmethod
+ def for_daemon_type(cls, daemon_type: str) -> bool:
+ return cls.daemon_type == daemon_type
+
+ def __init__(
+ self,
+ ctx: CephadmContext,
+ fsid: str,
+ daemon_id: Union[int, str],
+ config_json: Dict[str, Any],
+ image: Optional[str] = None,
+ ) -> None:
+ self.ctx = ctx
+ self.fsid = fsid
+ self.daemon_id = daemon_id
+ self.image = image or SNMPGateway.default_image
+
+ self.uid = config_json.get('uid', 0)
+ self.gid = config_json.get('gid', 0)
+
+ self.destination = config_json.get('destination', '')
+ self.snmp_version = config_json.get('snmp_version', 'V2c')
+ self.snmp_community = config_json.get('snmp_community', 'public')
+ self.log_level = config_json.get('log_level', 'info')
+ self.snmp_v3_auth_username = config_json.get(
+ 'snmp_v3_auth_username', ''
+ )
+ self.snmp_v3_auth_password = config_json.get(
+ 'snmp_v3_auth_password', ''
+ )
+ self.snmp_v3_auth_protocol = config_json.get(
+ 'snmp_v3_auth_protocol', ''
+ )
+ self.snmp_v3_priv_protocol = config_json.get(
+ 'snmp_v3_priv_protocol', ''
+ )
+ self.snmp_v3_priv_password = config_json.get(
+ 'snmp_v3_priv_password', ''
+ )
+ self.snmp_v3_engine_id = config_json.get('snmp_v3_engine_id', '')
+
+ self.validate()
+
+ @classmethod
+ def init(
+ cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str]
+ ) -> 'SNMPGateway':
+ cfgs = fetch_configs(ctx)
+ assert cfgs # assert some config data was found
+ return cls(ctx, fsid, daemon_id, cfgs, ctx.image)
+
+ @classmethod
+ def create(
+ cls, ctx: CephadmContext, ident: DaemonIdentity
+ ) -> 'SNMPGateway':
+ return cls.init(ctx, ident.fsid, ident.daemon_id)
+
+ @property
+ def identity(self) -> DaemonIdentity:
+ return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
+
+ @staticmethod
+ def get_version(
+ ctx: CephadmContext, fsid: str, daemon_id: str
+ ) -> Optional[str]:
+ """Return the version of the notifier from it's http endpoint"""
+ path = os.path.join(
+ ctx.data_dir, fsid, f'snmp-gateway.{daemon_id}', 'unit.meta'
+ )
+ try:
+ with open(path, 'r') as env:
+ metadata = json.loads(env.read())
+ except (OSError, json.JSONDecodeError):
+ return None
+
+ ports = metadata.get('ports', [])
+ if not ports:
+ return None
+
+ try:
+ with urlopen(f'http://127.0.0.1:{ports[0]}/') as r:
+ html = r.read().decode('utf-8').split('\n')
+ except (HTTPError, URLError):
+ return None
+
+ for h in html:
+ stripped = h.strip()
+ if stripped.startswith(('<pre>', '<PRE>')) and stripped.endswith(
+ ('</pre>', '</PRE>')
+ ):
+ # <pre>(version=1.2.1, branch=HEAD, revision=7...
+ return stripped.split(',')[0].split('version=')[1]
+
+ return None
+
+ @property
+ def port(self) -> int:
+ endpoints = fetch_endpoints(self.ctx)
+ if not endpoints:
+ return self.DEFAULT_PORT
+ return endpoints[0].port
+
+ def get_daemon_args(self) -> List[str]:
+ v3_args = []
+ base_args = [
+ f'--web.listen-address=:{self.port}',
+ f'--snmp.destination={self.destination}',
+ f'--snmp.version={self.snmp_version}',
+ f'--log.level={self.log_level}',
+ '--snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl',
+ ]
+
+ if self.snmp_version == 'V3':
+ # common auth settings
+ v3_args.extend(
+ [
+ '--snmp.authentication-enabled',
+ f'--snmp.authentication-protocol={self.snmp_v3_auth_protocol}',
+ f'--snmp.security-engine-id={self.snmp_v3_engine_id}',
+ ]
+ )
+ # authPriv setting is applied if we have a privacy protocol setting
+ if self.snmp_v3_priv_protocol:
+ v3_args.extend(
+ [
+ '--snmp.private-enabled',
+ f'--snmp.private-protocol={self.snmp_v3_priv_protocol}',
+ ]
+ )
+
+ return base_args + v3_args
+
+ @property
+ def data_dir(self) -> str:
+ return os.path.join(
+ self.ctx.data_dir,
+ self.ctx.fsid,
+ f'{self.daemon_type}.{self.daemon_id}',
+ )
+
+ @property
+ def conf_file_path(self) -> str:
+ return os.path.join(self.data_dir, self.env_filename)
+
+ def create_daemon_conf(self) -> None:
+ """Creates the environment file holding 'secrets' passed to the snmp-notifier daemon"""
+ with write_new(self.conf_file_path) as f:
+ if self.snmp_version == 'V2c':
+ f.write(f'SNMP_NOTIFIER_COMMUNITY={self.snmp_community}\n')
+ else:
+ f.write(
+ f'SNMP_NOTIFIER_AUTH_USERNAME={self.snmp_v3_auth_username}\n'
+ )
+ f.write(
+ f'SNMP_NOTIFIER_AUTH_PASSWORD={self.snmp_v3_auth_password}\n'
+ )
+ if self.snmp_v3_priv_password:
+ f.write(
+ f'SNMP_NOTIFIER_PRIV_PASSWORD={self.snmp_v3_priv_password}\n'
+ )
+
+ def validate(self) -> None:
+ """Validate the settings
+
+ Raises:
+ Error: if the fsid doesn't look like an fsid
+ Error: if the snmp version is not supported
+ Error: destination IP and port address missing
+ """
+ if not is_fsid(self.fsid):
+ raise Error(f'not a valid fsid: {self.fsid}')
+
+ if self.snmp_version not in SNMPGateway.SUPPORTED_VERSIONS:
+ raise Error(f'not a valid snmp version: {self.snmp_version}')
+
+ if not self.destination:
+ raise Error(
+ 'config is missing destination attribute(<ip>:<port>) of the target SNMP listener'
+ )
+
+ def container(self, ctx: CephadmContext) -> CephContainer:
+ ctr = daemon_to_container(ctx, self)
+ return to_deployment_container(ctx, ctr)
+
+ def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
+ return self.uid, self.gid
+
+ def customize_container_args(
+ self, ctx: CephadmContext, args: List[str]
+ ) -> None:
+ args.append(f'--env-file={self.conf_file_path}')
+
+ def customize_process_args(
+ self, ctx: CephadmContext, args: List[str]
+ ) -> None:
+ args.extend(self.get_daemon_args())
diff --git a/src/cephadm/cephadmlib/daemons/tracing.py b/src/cephadm/cephadmlib/daemons/tracing.py
new file mode 100644
index 00000000000..4d4fecacbb0
--- /dev/null
+++ b/src/cephadm/cephadmlib/daemons/tracing.py
@@ -0,0 +1,116 @@
+import logging
+
+from typing import Any, Dict, List, Tuple
+
+from ..constants import (
+ DEFAULT_ELASTICSEARCH_IMAGE,
+ DEFAULT_JAEGER_AGENT_IMAGE,
+ DEFAULT_JAEGER_COLLECTOR_IMAGE,
+ DEFAULT_JAEGER_QUERY_IMAGE,
+)
+from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
+from ..container_types import CephContainer
+from ..context import CephadmContext
+from ..context_getters import fetch_configs
+from ..daemon_form import register as register_daemon_form
+from ..daemon_identity import DaemonIdentity
+from ..deployment_utils import to_deployment_container
+
+
+logger = logging.getLogger()
+
+
+@register_daemon_form
+class Tracing(ContainerDaemonForm):
+ """Define the configs for the jaeger tracing containers"""
+
+ components: Dict[str, Dict[str, Any]] = {
+ 'elasticsearch': {
+ 'image': DEFAULT_ELASTICSEARCH_IMAGE,
+ 'envs': ['discovery.type=single-node'],
+ },
+ 'jaeger-agent': {
+ 'image': DEFAULT_JAEGER_AGENT_IMAGE,
+ },
+ 'jaeger-collector': {
+ 'image': DEFAULT_JAEGER_COLLECTOR_IMAGE,
+ },
+ 'jaeger-query': {
+ 'image': DEFAULT_JAEGER_QUERY_IMAGE,
+ },
+ } # type: ignore
+
+ @classmethod
+ def for_daemon_type(cls, daemon_type: str) -> bool:
+ return daemon_type in cls.components
+
+ @staticmethod
+ def set_configuration(config: Dict[str, str], daemon_type: str) -> None:
+ if daemon_type in ['jaeger-collector', 'jaeger-query']:
+ assert 'elasticsearch_nodes' in config
+ Tracing.components[daemon_type]['envs'] = [
+ 'SPAN_STORAGE_TYPE=elasticsearch',
+ f'ES_SERVER_URLS={config["elasticsearch_nodes"]}',
+ ]
+ if daemon_type == 'jaeger-agent':
+ assert 'collector_nodes' in config
+ Tracing.components[daemon_type]['daemon_args'] = [
+ f'--reporter.grpc.host-port={config["collector_nodes"]}',
+ '--processor.jaeger-compact.server-host-port=6799',
+ ]
+
+ def __init__(self, ident: DaemonIdentity) -> None:
+ self._identity = ident
+ self._configured = False
+
+ def _configure(self, ctx: CephadmContext) -> None:
+ if self._configured:
+ return
+ config = fetch_configs(ctx)
+ # Currently, this method side-effects the class attribute, and that
+ # is unpleasant. In the future it would be nice to move all of
+ # set_configuration into _confiure and only modify each classes data
+ # independently
+ self.set_configuration(config, self.identity.daemon_type)
+ self._configured = True
+
+ @classmethod
+ def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'Tracing':
+ return cls(ident)
+
+ @property
+ def identity(self) -> DaemonIdentity:
+ return self._identity
+
+ def container(self, ctx: CephadmContext) -> CephContainer:
+ ctr = daemon_to_container(ctx, self)
+ return to_deployment_container(ctx, ctr)
+
+ def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
+ return 65534, 65534
+
+ def get_daemon_args(self) -> List[str]:
+ return self.components[self.identity.daemon_type].get(
+ 'daemon_args', []
+ )
+
+ def customize_process_args(
+ self, ctx: CephadmContext, args: List[str]
+ ) -> None:
+ self._configure(ctx)
+ # earlier code did an explicit check if the daemon type was jaeger-agent
+ # and would only call get_daemon_args if that was true. However, since
+ # the function only returns a non-empty list in the case of jaeger-agent
+ # that check is unnecessary and is not brought over.
+ args.extend(self.get_daemon_args())
+
+ def customize_container_envs(
+ self, ctx: CephadmContext, envs: List[str]
+ ) -> None:
+ self._configure(ctx)
+ envs.extend(
+ self.components[self.identity.daemon_type].get('envs', [])
+ )
+
+ def default_entrypoint(self) -> str:
+ return ''
diff --git a/src/cephadm/cephadmlib/deployment_utils.py b/src/cephadm/cephadmlib/deployment_utils.py
new file mode 100644
index 00000000000..908fa979f1a
--- /dev/null
+++ b/src/cephadm/cephadmlib/deployment_utils.py
@@ -0,0 +1,35 @@
+import os
+
+from .container_types import CephContainer
+from .context import CephadmContext
+from cephadmlib.context_getters import fetch_custom_config_files
+
+
+def to_deployment_container(
+ ctx: CephadmContext, ctr: CephContainer
+) -> CephContainer:
+ """Given a standard ceph container instance return a CephContainer
+ prepared for a deployment as a daemon, having the extra args and
+ custom configurations added.
+ NOTE: The `ctr` object is mutated before being returned.
+ """
+ if 'extra_container_args' in ctx and ctx.extra_container_args:
+ ctr.container_args.extend(ctx.extra_container_args)
+ if 'extra_entrypoint_args' in ctx and ctx.extra_entrypoint_args:
+ ctr.args.extend(ctx.extra_entrypoint_args)
+ ccfiles = fetch_custom_config_files(ctx)
+ if ccfiles:
+ mandatory_keys = ['mount_path', 'content']
+ for conf in ccfiles:
+ if all(k in conf for k in mandatory_keys):
+ mount_path = conf['mount_path']
+ assert ctr.identity
+ file_path = os.path.join(
+ ctx.data_dir,
+ ctr.identity.fsid,
+ 'custom_config_files',
+ ctr.identity.daemon_name,
+ os.path.basename(mount_path),
+ )
+ ctr.volume_mounts[file_path] = mount_path
+ return ctr
diff --git a/src/cephadm/cephadmlib/file_utils.py b/src/cephadm/cephadmlib/file_utils.py
index 7c9e6f69e43..1b9f11499a4 100644
--- a/src/cephadm/cephadmlib/file_utils.py
+++ b/src/cephadm/cephadmlib/file_utils.py
@@ -139,3 +139,7 @@ def get_file_timestamp(fn):
).strftime(DATEFMT)
except Exception:
return None
+
+
+def make_run_dir(fsid: str, uid: int, gid: int) -> None:
+ makedirs(f'/var/run/ceph/{fsid}', uid, gid, 0o770)
diff --git a/src/cephadm/cephadmlib/logging.py b/src/cephadm/cephadmlib/logging.py
index 5e306484b98..f5893d3a51d 100644
--- a/src/cephadm/cephadmlib/logging.py
+++ b/src/cephadm/cephadmlib/logging.py
@@ -12,6 +12,10 @@ from typing import List, Any, Dict, Optional, cast
from .context import CephadmContext
from .constants import QUIET_LOG_LEVEL, LOG_DIR
+from cephadmlib.file_utils import write_new
+
+from cephadmlib import templating
+
class _ExcludeErrorsFilter(logging.Filter):
def filter(self, record: logging.LogRecord) -> bool:
@@ -145,18 +149,6 @@ _interactive_logging_config = {
}
-_logrotate_data = """# created by cephadm
-/var/log/ceph/cephadm.log {
- rotate 7
- daily
- compress
- missingok
- notifempty
- su root root
-}
-"""
-
-
_VERBOSE_HANDLERS = [
'console',
'console_stdout',
@@ -222,9 +214,7 @@ def cephadm_init_logging(
logger.setLevel(QUIET_LOG_LEVEL)
- if not os.path.exists(ctx.logrotate_dir + '/cephadm'):
- with open(ctx.logrotate_dir + '/cephadm', 'w') as f:
- f.write(_logrotate_data)
+ write_cephadm_logrotate_config(ctx)
for handler in logger.handlers:
# the following little hack ensures that no matter how cephadm is named
@@ -239,3 +229,48 @@ def cephadm_init_logging(
if ctx.verbose and handler.name in _VERBOSE_HANDLERS:
handler.setLevel(QUIET_LOG_LEVEL)
logger.debug('%s\ncephadm %s' % ('-' * 80, args))
+
+
+def write_cephadm_logrotate_config(ctx: CephadmContext) -> None:
+ if not os.path.exists(ctx.logrotate_dir + '/cephadm'):
+ with open(ctx.logrotate_dir + '/cephadm', 'w') as f:
+ cephadm_logrotate_config = templating.render(
+ ctx, templating.Templates.cephadm_logrotate_config
+ )
+ f.write(cephadm_logrotate_config)
+
+
+def write_cluster_logrotate_config(ctx: CephadmContext, fsid: str) -> None:
+ # logrotate for the cluster
+ with write_new(ctx.logrotate_dir + f'/ceph-{fsid}', perms=None) as f:
+ """
+ See cephadm/cephadmlib/templates/cluster.logrotate.config.j2 to
+ get a better idea what this comment is referring to
+
+ This is a bit sloppy in that the killall/pkill will touch all ceph daemons
+ in all containers, but I don't see an elegant way to send SIGHUP *just* to
+ the daemons for this cluster. (1) systemd kill -s will get the signal to
+ podman, but podman will exit. (2) podman kill will get the signal to the
+ first child (bash), but that isn't the ceph daemon. This is simpler and
+ should be harmless.
+ """
+ targets: List[str] = [
+ 'ceph-mon',
+ 'ceph-mgr',
+ 'ceph-mds',
+ 'ceph-osd',
+ 'ceph-fuse',
+ 'radosgw',
+ 'rbd-mirror',
+ 'cephfs-mirror',
+ 'tcmu-runner',
+ ]
+
+ logrotate_config = templating.render(
+ ctx,
+ templating.Templates.cluster_logrotate_config,
+ fsid=fsid,
+ targets=targets,
+ )
+
+ f.write(logrotate_config)
diff --git a/src/cephadm/cephadmlib/runscripts.py b/src/cephadm/cephadmlib/runscripts.py
new file mode 100644
index 00000000000..7772248f362
--- /dev/null
+++ b/src/cephadm/cephadmlib/runscripts.py
@@ -0,0 +1,244 @@
+import contextlib
+import json
+import pathlib
+import shlex
+
+from typing import Any, Dict, Union, List, IO, TextIO, Optional, cast
+
+from .container_engines import Podman
+from .container_types import CephContainer, InitContainer
+from .context import CephadmContext
+from .context_getters import fetch_meta
+from .daemon_identity import DaemonIdentity
+from .file_utils import write_new
+from .net_utils import EndPoint
+
+
+# Ideally, all ContainerCommands would be converted to init containers. Until
+# that is done one can wrap a CephContainer in a ContainerCommand object and
+# pass that as a pre- or post- command to run arbitrary container based
+# commands in the script.
+class ContainerCommand:
+ def __init__(
+ self,
+ container: CephContainer,
+ comment: str = '',
+ background: bool = False,
+ ):
+ self.container = container
+ self.comment = comment
+ self.background = background
+
+
+Command = Union[List[str], str, ContainerCommand]
+
+
+def write_service_scripts(
+ ctx: CephadmContext,
+ ident: DaemonIdentity,
+ *,
+ container: CephContainer,
+ init_containers: Optional[List[InitContainer]] = None,
+ endpoints: Optional[List[EndPoint]] = None,
+ pre_start_commands: Optional[List[Command]] = None,
+ post_stop_commands: Optional[List[Command]] = None,
+ timeout: Optional[int] = None,
+) -> None:
+ """Write the scripts that systemd services will call in order to
+ start/stop/etc components of a cephadm managed daemon. Also writes some
+ metadata about the service getting deployed.
+ """
+ data_dir = pathlib.Path(ident.data_dir(ctx.data_dir))
+ run_file_path = data_dir / 'unit.run'
+ meta_file_path = data_dir / 'unit.meta'
+ post_stop_file_path = data_dir / 'unit.poststop'
+ stop_file_path = data_dir / 'unit.stop'
+ image_file_path = data_dir / 'unit.image'
+ # use an ExitStack to make writing the files an all-or-nothing affair. If
+ # any file fails to write then the write_new'd file will not get renamed
+ # into place
+ with contextlib.ExitStack() as estack:
+ # write out the main file to run (start) a service
+ runf = estack.enter_context(write_new(run_file_path))
+ runf.write('set -e\n')
+ for command in pre_start_commands or []:
+ _write_command(ctx, runf, command)
+ init_containers = init_containers or []
+ if init_containers:
+ _write_init_container_cmds_clean(ctx, runf, init_containers[0])
+ for idx, ic in enumerate(init_containers):
+ _write_init_container_cmds(ctx, runf, idx, ic)
+ _write_container_cmd_to_bash(ctx, runf, container, ident.daemon_name)
+
+ # some metadata about the deploy
+ metaf = estack.enter_context(write_new(meta_file_path))
+ meta: Dict[str, Any] = fetch_meta(ctx)
+ meta.update(
+ {
+ 'memory_request': int(ctx.memory_request)
+ if ctx.memory_request
+ else None,
+ 'memory_limit': int(ctx.memory_limit)
+ if ctx.memory_limit
+ else None,
+ }
+ )
+ if not meta.get('ports'):
+ if endpoints:
+ meta['ports'] = [e.port for e in endpoints]
+ else:
+ meta['ports'] = []
+ metaf.write(json.dumps(meta, indent=4) + '\n')
+
+ # post-stop command(s)
+ pstopf = estack.enter_context(write_new(post_stop_file_path))
+ # this is a fallback to eventually stop any underlying container that
+ # was not stopped properly by unit.stop, this could happen in very slow
+ # setups as described in the issue
+ # https://tracker.ceph.com/issues/58242.
+ _write_stop_actions(ctx, cast(TextIO, pstopf), container, timeout)
+ for command in post_stop_commands or []:
+ _write_command(ctx, pstopf, command)
+
+ # stop command(s)
+ stopf = estack.enter_context(write_new(stop_file_path))
+ _write_stop_actions(ctx, cast(TextIO, stopf), container, timeout)
+
+ if container:
+ imgf = estack.enter_context(write_new(image_file_path))
+ imgf.write(container.image + '\n')
+
+
+def _write_container_cmd_to_bash(
+ ctx: CephadmContext,
+ file_obj: IO[str],
+ container: 'CephContainer',
+ comment: Optional[str] = None,
+ background: Optional[bool] = False,
+) -> None:
+ if comment:
+ # Sometimes adding a comment, especially if there are multiple containers in one
+ # unit file, makes it easier to read and grok.
+ assert '\n' not in comment
+ file_obj.write(f'# {comment}\n')
+ # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
+ _bash_cmd(
+ file_obj, container.rm_cmd(old_cname=True), check=False, stderr=False
+ )
+ _bash_cmd(file_obj, container.rm_cmd(), check=False, stderr=False)
+
+ # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
+ if isinstance(ctx.container_engine, Podman):
+ _bash_cmd(
+ file_obj,
+ container.rm_cmd(storage=True),
+ check=False,
+ stderr=False,
+ )
+ _bash_cmd(
+ file_obj,
+ container.rm_cmd(old_cname=True, storage=True),
+ check=False,
+ stderr=False,
+ )
+
+ # container run command
+ _bash_cmd(file_obj, container.run_cmd(), background=bool(background))
+
+
+def _write_init_container_cmds(
+ ctx: CephadmContext,
+ file_obj: IO[str],
+ index: int,
+ init_container: 'InitContainer',
+) -> None:
+ file_obj.write(f'# init container {index}: {init_container.cname}\n')
+ _bash_cmd(file_obj, init_container.run_cmd())
+ _write_init_container_cmds_clean(
+ ctx, file_obj, init_container, comment=''
+ )
+
+
+def _write_init_container_cmds_clean(
+ ctx: CephadmContext,
+ file_obj: IO[str],
+ init_container: 'InitContainer',
+ comment: str = 'init container cleanup',
+) -> None:
+ if comment:
+ assert '\n' not in comment
+ file_obj.write(f'# {comment}\n')
+ _bash_cmd(
+ file_obj,
+ init_container.rm_cmd(),
+ check=False,
+ stderr=False,
+ )
+ # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
+ if isinstance(ctx.container_engine, Podman):
+ _bash_cmd(
+ file_obj,
+ init_container.rm_cmd(storage=True),
+ check=False,
+ stderr=False,
+ )
+
+
+def _write_stop_actions(
+ ctx: CephadmContext,
+ f: TextIO,
+ container: 'CephContainer',
+ timeout: Optional[int],
+) -> None:
+ # following generated script basically checks if the container exists
+ # before stopping it. Exit code will be success either if it doesn't
+ # exist or if it exists and is stopped successfully.
+ container_exists = f'{ctx.container_engine.path} inspect %s &>/dev/null'
+ f.write(
+ f'! {container_exists % container.old_cname} || {" ".join(container.stop_cmd(old_cname=True, timeout=timeout))} \n'
+ )
+ f.write(
+ f'! {container_exists % container.cname} || {" ".join(container.stop_cmd(timeout=timeout))} \n'
+ )
+
+
+def _bash_cmd(
+ fh: IO[str],
+ cmd: List[str],
+ check: bool = True,
+ background: bool = False,
+ stderr: bool = True,
+) -> None:
+ line = ' '.join(shlex.quote(arg) for arg in cmd)
+ if not check:
+ line = f'! {line}'
+ if not stderr:
+ line = f'{line} 2> /dev/null'
+ if background:
+ line = f'{line} &'
+ fh.write(line)
+ fh.write('\n')
+
+
+def _write_command(
+ ctx: CephadmContext,
+ fh: IO[str],
+ cmd: Command,
+) -> None:
+ """Wrapper func for turning a command list or string into something suitable
+ for appending to a run script.
+ """
+ if isinstance(cmd, list):
+ _bash_cmd(fh, cmd)
+ elif isinstance(cmd, ContainerCommand):
+ _write_container_cmd_to_bash(
+ ctx,
+ fh,
+ cmd.container,
+ comment=cmd.comment,
+ background=cmd.background,
+ )
+ else:
+ fh.write(cmd)
+ if not cmd.endswith('\n'):
+ fh.write('\n')
diff --git a/src/cephadm/cephadmlib/systemd_unit.py b/src/cephadm/cephadmlib/systemd_unit.py
new file mode 100644
index 00000000000..0c392c77bfe
--- /dev/null
+++ b/src/cephadm/cephadmlib/systemd_unit.py
@@ -0,0 +1,81 @@
+# systemd_unit.py - creating/managing systemd unit files
+
+import os
+
+from . import templating
+from .call_wrappers import call_throws
+from .container_engines import Docker, Podman
+from .context import CephadmContext
+from .daemon_identity import DaemonIdentity
+from .file_utils import write_new
+from .logging import write_cluster_logrotate_config
+
+
+def _get_unit_file(ctx: CephadmContext, fsid: str) -> str:
+ has_docker_engine = isinstance(ctx.container_engine, Docker)
+ has_podman_engine = isinstance(ctx.container_engine, Podman)
+ has_podman_split_version = (
+ has_podman_engine and ctx.container_engine.supports_split_cgroups
+ )
+ return templating.render(
+ ctx,
+ templating.Templates.ceph_service,
+ fsid=fsid,
+ has_docker_engine=has_docker_engine,
+ has_podman_engine=has_podman_engine,
+ has_podman_split_version=has_podman_split_version,
+ )
+
+
+def _install_base_units(ctx: CephadmContext, fsid: str) -> None:
+ """
+ Set up ceph.target and ceph-$fsid.target units.
+ """
+ # global unit
+ existed = os.path.exists(ctx.unit_dir + '/ceph.target')
+ with write_new(ctx.unit_dir + '/ceph.target', perms=None) as f:
+ f.write('[Unit]\n'
+ 'Description=All Ceph clusters and services\n'
+ '\n'
+ '[Install]\n'
+ 'WantedBy=multi-user.target\n')
+ if not existed:
+ # we disable before enable in case a different ceph.target
+ # (from the traditional package) is present; while newer
+ # systemd is smart enough to disable the old
+ # (/lib/systemd/...) and enable the new (/etc/systemd/...),
+ # some older versions of systemd error out with EEXIST.
+ call_throws(ctx, ['systemctl', 'disable', 'ceph.target'])
+ call_throws(ctx, ['systemctl', 'enable', 'ceph.target'])
+ call_throws(ctx, ['systemctl', 'start', 'ceph.target'])
+
+ # cluster unit
+ existed = os.path.exists(ctx.unit_dir + '/ceph-%s.target' % fsid)
+ with write_new(ctx.unit_dir + f'/ceph-{fsid}.target', perms=None) as f:
+ f.write(
+ '[Unit]\n'
+ 'Description=Ceph cluster {fsid}\n'
+ 'PartOf=ceph.target\n'
+ 'Before=ceph.target\n'
+ '\n'
+ '[Install]\n'
+ 'WantedBy=multi-user.target ceph.target\n'.format(
+ fsid=fsid)
+ )
+ if not existed:
+ call_throws(ctx, ['systemctl', 'enable', 'ceph-%s.target' % fsid])
+ call_throws(ctx, ['systemctl', 'start', 'ceph-%s.target' % fsid])
+
+ # don't overwrite file in order to allow users to manipulate it
+ if os.path.exists(ctx.logrotate_dir + f'/ceph-{fsid}'):
+ return
+
+ write_cluster_logrotate_config(ctx, fsid)
+
+
+def update_files(ctx: CephadmContext, ident: DaemonIdentity) -> None:
+ _install_base_units(ctx, ident.fsid)
+ unit = _get_unit_file(ctx, ident.fsid)
+ unit_file = 'ceph-%s@.service' % (ident.fsid)
+ with write_new(ctx.unit_dir + '/' + unit_file, perms=None) as f:
+ f.write(unit)
diff --git a/src/cephadm/cephadmlib/templates/cephadm.logrotate.config.j2 b/src/cephadm/cephadmlib/templates/cephadm.logrotate.config.j2
new file mode 100644
index 00000000000..b18aaff2196
--- /dev/null
+++ b/src/cephadm/cephadmlib/templates/cephadm.logrotate.config.j2
@@ -0,0 +1,9 @@
+# created by cephadm
+/var/log/ceph/cephadm.log {
+ rotate 7
+ daily
+ compress
+ missingok
+ notifempty
+ su root root
+}
diff --git a/src/cephadm/cephadmlib/templates/cluster.logrotate.config.j2 b/src/cephadm/cephadmlib/templates/cluster.logrotate.config.j2
new file mode 100644
index 00000000000..9af2f955d90
--- /dev/null
+++ b/src/cephadm/cephadmlib/templates/cluster.logrotate.config.j2
@@ -0,0 +1,13 @@
+# created by cephadm
+/var/log/ceph/{{ fsid }}/*.log {
+ rotate 7
+ daily
+ compress
+ sharedscripts
+ postrotate
+ killall -q -1 {{ targets|join(' ') }} || pkill -1 -x '{{ targets|join('|') }}' || true
+ endscript
+ missingok
+ notifempty
+ su root root
+}
diff --git a/src/cephadm/cephadmlib/templating.py b/src/cephadm/cephadmlib/templating.py
index e6e8d5e0ea2..5f4dda05608 100644
--- a/src/cephadm/cephadmlib/templating.py
+++ b/src/cephadm/cephadmlib/templating.py
@@ -3,6 +3,7 @@
import enum
import os
import posixpath
+import shlex
import zipimport
from typing import Any, Optional, IO, Tuple, Callable, cast
@@ -21,6 +22,8 @@ class Templates(str, enum.Enum):
ceph_service = 'ceph.service.j2'
agent_service = 'agent.service.j2'
+ cluster_logrotate_config = 'cluster.logrotate.config.j2'
+ cephadm_logrotate_config = 'cephadm.logrotate.config.j2'
def __str__(self) -> str:
return self.value
@@ -29,6 +32,25 @@ class Templates(str, enum.Enum):
return repr(self.value)
+class TemplateNotFoundInZipApp(jinja2.TemplateNotFound):
+ def __init__(
+ self,
+ template: str,
+ *,
+ relative_path: str = '',
+ archive_path: str = '',
+ ) -> None:
+ super().__init__(template)
+ self.relative_path = relative_path
+ self.archive_path = archive_path
+
+ def __str__(self) -> str:
+ return (
+ f'{self.message}: path {self.relative_path!r}'
+ f' not found in {self.archive_path!r}'
+ )
+
+
class _PackageLoader(jinja2.PackageLoader):
"""Workaround for PackageLoader when using cephadm with relative paths.
@@ -58,20 +80,23 @@ class _PackageLoader(jinja2.PackageLoader):
def _get_archive_source(self, template: str) -> Tuple[str, str, None]:
assert isinstance(self._loader, zipimport.zipimporter)
- path = arelpath = os.path.normpath(
- posixpath.join(
- self._template_root,
- *jinja2.loaders.split_template_path(template)
- )
+ arelpath = posixpath.join(
+ self.package_name, self.package_path, template
)
- archive_path = os.path.normpath(self._loader.archive)
- if arelpath.startswith(archive_path + '/'):
- plen = len(archive_path) + 1
- arelpath = arelpath[plen:]
+ if any(p == '.' or p == '..' for p in arelpath.split(posixpath.sep)):
+ raise ValueError('template path contains invalid components')
try:
source = cast(bytes, self._loader.get_data(arelpath))
except OSError as e:
- raise jinja2.TemplateNotFound(template) from e
+ not_found = TemplateNotFoundInZipApp(
+ template,
+ relative_path=arelpath,
+ archive_path=self._loader.archive,
+ )
+ raise not_found from e
+ path = os.path.normpath(
+ posixpath.join(self._loader.archive, arelpath)
+ )
return source.decode(self.encoding), path, None
@@ -89,6 +114,7 @@ class Templater:
def _env(self) -> jinja2.Environment:
if self._jinja2_env is None:
self._jinja2_env = jinja2.Environment(loader=self._loader)
+ self._jinja2_env.filters['shellquote'] = shlex.quote
return self._jinja2_env
@property
diff --git a/src/cephadm/tests/fixtures.py b/src/cephadm/tests/fixtures.py
index d25dffa9e3b..572c1f9969d 100644
--- a/src/cephadm/tests/fixtures.py
+++ b/src/cephadm/tests/fixtures.py
@@ -6,7 +6,7 @@ import time
from contextlib import contextmanager
from pyfakefs import fake_filesystem
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Any
def import_cephadm():
@@ -183,3 +183,83 @@ def with_cephadm_ctx(
else:
yield ctx
+
+@pytest.fixture()
+def funkypatch(monkeypatch):
+ """Defines the funkypatch fixtures that acts like a mixture between
+ mock.patch and pytest's monkeypatch fixture.
+ """
+ fp = FunkyPatcher(monkeypatch)
+ yield fp
+
+
+class FunkyPatcher:
+ """FunkyPatcher monkeypatches all imported instances of an object.
+
+ Use `patch` to patch the canonical location of an object and FunkyPatcher
+ will automatically replace other imports of that object.
+ """
+
+ def __init__(self, monkeypatcher):
+ self._mp = monkeypatcher
+ # keep track of objects we've already patched. this dictionary
+ # maps a (module-name, object-name) tuple to the original object
+ # before patching. This could be used to determine if a name has
+ # already been patched or compare a patched object to the original.
+ self._originals: Dict[Tuple[str, str], Any] = {}
+
+ def patch(
+ self,
+ mod: str,
+ name: str = '',
+ *,
+ dest: Any = None,
+ force: bool = False,
+ ) -> Any:
+ """Patch an object and all existing imports of that object.
+ Specify mod as `my.mod.name.obj` where obj is name of the object to be
+ patched or as `my.mod.name` and specify `name` as the name of the
+ object to be patched.
+ If the object to be patched is not imported as the same name in `mod`
+ it will *not* be automatically patched. In other words, `from
+ my.mod.name import foo` will work, but `from my.mod.name import foo as
+ _foo` will not.
+ Use the keyword-only argument `dest` to specify the new object to be
+ used. A MagicMock will be created and used if dest is None.
+ Use the keyword-only argument `force` to override checks that a mocked
+ objects are the same across modules. This can be used in the case that
+ some other code already patched an object and you want funkypatch to
+ override that patch (use with caution).
+ Returns the patched object (the MagicMock or supplied dest).
+ """
+ import sys
+ import importlib
+
+ if not name:
+ mod, name = mod.rsplit('.', 1)
+ modname = (mod, name)
+ # We don't strictly need the check but patching already patched objs is
+ # confusing to think about. It's better to block it for now and perhaps
+ # later we can relax these restrictions or be clever in some way.
+ if modname in self._originals:
+ raise KeyError(f'{modname} already patched')
+
+ if dest is None:
+ dest = mock.MagicMock()
+
+ imod = importlib.import_module(mod)
+ self._originals[modname] = getattr(imod, name)
+
+ for mname, imod in sys.modules.items():
+ try:
+ obj = getattr(imod, name)
+ except AttributeError:
+ # no matching name in module
+ continue
+ # make sure that the module imported the same object as the
+ # one we want to patch out, and not just some naming collision.
+ # ensure the original object and the one in the module are the
+ # same object
+ if obj is self._originals[modname] or force:
+ self._mp.setattr(imod, name, dest)
+ return dest
diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py
index 8db8edd0c1b..82850ab597d 100644
--- a/src/cephadm/tests/test_cephadm.py
+++ b/src/cephadm/tests/test_cephadm.py
@@ -16,6 +16,7 @@ from .fixtures import (
with_cephadm_ctx,
mock_bad_firewalld,
import_cephadm,
+ funkypatch,
)
from pyfakefs import fake_filesystem
@@ -317,13 +318,17 @@ class TestCephAdm(object):
with pytest.raises(Exception):
_cephadm.prepare_dashboard(ctx, 0, 0, lambda _, extra_mounts=None, ___=None : '5', lambda : None)
- @mock.patch('cephadm.logger')
- @mock.patch('cephadm.fetch_custom_config_files')
- @mock.patch('cephadm.get_container')
- def test_to_deployment_container(self, _get_container, _get_config, _logger):
+ def test_to_deployment_container(self, funkypatch):
"""
test to_deployment_container properly makes use of extra container args and custom conf files
"""
+ from cephadmlib.deployment_utils import to_deployment_container
+
+ funkypatch.patch('cephadm.logger')
+ _get_config = funkypatch.patch(
+ 'cephadmlib.deployment_utils.fetch_custom_config_files'
+ )
+ _get_container = funkypatch.patch('cephadm.get_container')
ctx = _cephadm.CephadmContext()
ctx.config_json = '-'
@@ -357,32 +362,45 @@ class TestCephAdm(object):
host_network=True,
)
c = _cephadm.get_container(ctx, ident)
- c = _cephadm.to_deployment_container(ctx, c)
+ c = to_deployment_container(ctx, c)
assert '--pids-limit=12345' in c.container_args
assert '--something' in c.container_args
assert os.path.join('data', '9b9d7609-f4d5-4aba-94c8-effa764d96c9', 'custom_config_files', 'grafana.host1', 'testing.str') in c.volume_mounts
assert c.volume_mounts[os.path.join('data', '9b9d7609-f4d5-4aba-94c8-effa764d96c9', 'custom_config_files', 'grafana.host1', 'testing.str')] == '/etc/testing.str'
- @mock.patch('cephadm.logger')
- @mock.patch('cephadm.FileLock')
- @mock.patch('cephadm.deploy_daemon')
- @mock.patch('cephadm.make_var_run')
- @mock.patch('cephadm.migrate_sysctl_dir')
- @mock.patch('cephadm.check_unit', lambda *args, **kwargs: (None, 'running', None))
- @mock.patch('cephadm.get_unit_name', lambda *args, **kwargs: 'mon-unit-name')
- @mock.patch('cephadm.extract_uid_gid', lambda *args, **kwargs: (0, 0))
- @mock.patch('cephadm.get_container')
- @mock.patch('cephadm.apply_deploy_config_to_ctx', lambda d, c: None)
- def test_mon_crush_location(self, _get_container, _migrate_sysctl, _make_var_run, _deploy_daemon, _file_lock, _logger, monkeypatch):
+ def test_mon_crush_location(self, funkypatch):
"""
test that crush location for mon is set if it is included in config_json
"""
- _fetch_configs = mock.MagicMock()
- monkeypatch.setattr('cephadmlib.context_getters.fetch_configs', _fetch_configs)
- monkeypatch.setattr('cephadm.fetch_configs', _fetch_configs)
- monkeypatch.setattr('cephadm.read_configuration_source', lambda c: {})
- monkeypatch.setattr('cephadm.fetch_custom_config_files', mock.MagicMock())
+ funkypatch.patch('cephadm.logger')
+ funkypatch.patch('cephadm.FileLock')
+ _deploy_daemon = funkypatch.patch('cephadm.deploy_daemon')
+ funkypatch.patch('cephadm.make_var_run')
+ funkypatch.patch('cephadmlib.file_utils.make_run_dir')
+ _migrate_sysctl = funkypatch.patch('cephadm.migrate_sysctl_dir')
+ funkypatch.patch(
+ 'cephadm.check_unit',
+ dest=lambda *args, **kwargs: (None, 'running', None),
+ )
+ funkypatch.patch(
+ 'cephadm.get_unit_name',
+ dest=lambda *args, **kwargs: 'mon-unit-name',
+ )
+ funkypatch.patch(
+ 'cephadm.extract_uid_gid', dest=lambda *args, **kwargs: (0, 0)
+ )
+ _get_container = funkypatch.patch('cephadm.get_container')
+ funkypatch.patch(
+ 'cephadm.apply_deploy_config_to_ctx', dest=lambda d, c: None
+ )
+ _fetch_configs = funkypatch.patch(
+ 'cephadmlib.context_getters.fetch_configs'
+ )
+ funkypatch.patch(
+ 'cephadm.read_configuration_source', dest=lambda c: {}
+ )
+ funkypatch.patch('cephadm.fetch_custom_config_files')
ctx = _cephadm.CephadmContext()
ctx.name = 'mon.test'
@@ -541,25 +559,31 @@ class TestCephAdm(object):
def test_dict_get(self):
- result = _cephadm.dict_get({'a': 1}, 'a', require=True)
+ from cephadmlib.data_utils import dict_get
+
+ result = dict_get({'a': 1}, 'a', require=True)
assert result == 1
- result = _cephadm.dict_get({'a': 1}, 'b')
+ result = dict_get({'a': 1}, 'b')
assert result is None
- result = _cephadm.dict_get({'a': 1}, 'b', default=2)
+ result = dict_get({'a': 1}, 'b', default=2)
assert result == 2
def test_dict_get_error(self):
+ from cephadmlib.data_utils import dict_get
+
with pytest.raises(_cephadm.Error):
- _cephadm.dict_get({'a': 1}, 'b', require=True)
+ dict_get({'a': 1}, 'b', require=True)
def test_dict_get_join(self):
- result = _cephadm.dict_get_join({'foo': ['a', 'b']}, 'foo')
+ from cephadmlib.data_utils import dict_get_join
+
+ result = dict_get_join({'foo': ['a', 'b']}, 'foo')
assert result == 'a\nb'
- result = _cephadm.dict_get_join({'foo': [1, 2]}, 'foo')
+ result = dict_get_join({'foo': [1, 2]}, 'foo')
assert result == '1\n2'
- result = _cephadm.dict_get_join({'bar': 'a'}, 'bar')
+ result = dict_get_join({'bar': 'a'}, 'bar')
assert result == 'a'
- result = _cephadm.dict_get_join({'a': 1}, 'a')
+ result = dict_get_join({'a': 1}, 'a')
assert result == 1
@mock.patch('os.listdir', return_value=[])
@@ -761,24 +785,26 @@ class TestCephAdm(object):
assert _cephadm.get_container_info(ctx, daemon_filter, by_name) == output
def test_should_log_to_journald(self):
+ from cephadmlib import context_getters
+
ctx = _cephadm.CephadmContext()
# explicit
ctx.log_to_journald = True
- assert _cephadm.should_log_to_journald(ctx)
+ assert context_getters.should_log_to_journald(ctx)
ctx.log_to_journald = None
# enable if podman support --cgroup=split
ctx.container_engine = mock_podman()
ctx.container_engine.version = (2, 1, 0)
- assert _cephadm.should_log_to_journald(ctx)
+ assert context_getters.should_log_to_journald(ctx)
# disable on old podman
ctx.container_engine.version = (2, 0, 0)
- assert not _cephadm.should_log_to_journald(ctx)
+ assert not context_getters.should_log_to_journald(ctx)
# disable on docker
ctx.container_engine = mock_docker()
- assert not _cephadm.should_log_to_journald(ctx)
+ assert not context_getters.should_log_to_journald(ctx)
def test_normalize_image_digest(self):
s = 'myhostname:5000/ceph/ceph@sha256:753886ad9049004395ae990fbb9b096923b5a518b819283141ee8716ddf55ad1'
@@ -1186,15 +1212,17 @@ class TestMaintenance:
class TestMonitoring(object):
- @mock.patch('cephadm.call')
+ @mock.patch('cephadmlib.daemons.monitoring.call')
def test_get_version_alertmanager(self, _call):
+ from cephadmlib.daemons import monitoring
+
ctx = _cephadm.CephadmContext()
ctx.container_engine = mock_podman()
daemon_type = 'alertmanager'
# binary `prometheus`
_call.return_value = '', '{}, version 0.16.1'.format(daemon_type), 0
- version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type)
+ version = monitoring.Monitoring.get_version(ctx, 'container_id', daemon_type)
assert version == '0.16.1'
# binary `prometheus-alertmanager`
@@ -1205,13 +1233,15 @@ class TestMonitoring(object):
version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type)
assert version == '0.16.1'
- @mock.patch('cephadm.call')
+ @mock.patch('cephadmlib.daemons.monitoring.call')
def test_get_version_prometheus(self, _call):
+ from cephadmlib.daemons import monitoring
+
ctx = _cephadm.CephadmContext()
ctx.container_engine = mock_podman()
daemon_type = 'prometheus'
_call.return_value = '', '{}, version 0.16.1'.format(daemon_type), 0
- version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type)
+ version = monitoring.Monitoring.get_version(ctx, 'container_id', daemon_type)
assert version == '0.16.1'
def test_prometheus_external_url(self):
@@ -1225,13 +1255,15 @@ class TestMonitoring(object):
).get_daemon_args()
assert any([x.startswith('--web.external-url=http://') for x in args])
- @mock.patch('cephadm.call')
+ @mock.patch('cephadmlib.daemons.monitoring.call')
def test_get_version_node_exporter(self, _call):
+ from cephadmlib.daemons import monitoring
+
ctx = _cephadm.CephadmContext()
ctx.container_engine = mock_podman()
daemon_type = 'node-exporter'
_call.return_value = '', '{}, version 0.16.1'.format(daemon_type.replace('-', '_')), 0
- version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type)
+ version = monitoring.Monitoring.get_version(ctx, 'container_id', daemon_type)
assert version == '0.16.1'
def test_create_daemon_dirs_prometheus(self, cephadm_fs):
@@ -2113,16 +2145,12 @@ class TestValidateRepo:
class TestPull:
-
- @mock.patch('time.sleep')
- @mock.patch('cephadm.get_image_info_from_inspect', return_value={})
- @mock.patch('cephadm.logger')
- def test_error(self, _logger, _get_image_info_from_inspect, _sleep, monkeypatch):
- # manually create a mock and use pytest's monkeypatch fixture to set
- # multiple targets to the *same* mock
- _call = mock.MagicMock()
- monkeypatch.setattr('cephadm.call', _call)
- monkeypatch.setattr('cephadmlib.call_wrappers.call', _call)
+ def test_error(self, funkypatch):
+ funkypatch.patch('time.sleep')
+ funkypatch.patch('cephadm.logger')
+ _giifi = funkypatch.patch('cephadm.get_image_info_from_inspect')
+ _giifi.return_value = {}
+ _call = funkypatch.patch('cephadmlib.call_wrappers.call')
ctx = _cephadm.CephadmContext()
ctx.container_engine = mock_podman()
ctx.insecure = False
diff --git a/src/cephadm/tests/test_container_engine.py b/src/cephadm/tests/test_container_engine.py
index 7c5ef513127..49f9f9a2e50 100644
--- a/src/cephadm/tests/test_container_engine.py
+++ b/src/cephadm/tests/test_container_engine.py
@@ -4,7 +4,7 @@ import pytest
from tests.fixtures import with_cephadm_ctx, import_cephadm
-_cephadm = import_cephadm()
+from cephadmlib import container_engines
_find_program_loc = 'cephadmlib.container_engine_base.find_program'
@@ -29,7 +29,7 @@ def test_container_engine():
def test_podman():
with mock.patch(_find_program_loc) as find_program:
find_program.return_value = "/usr/bin/podman"
- pm = _cephadm.Podman()
+ pm = container_engines.Podman()
find_program.assert_called()
with pytest.raises(RuntimeError):
pm.version
@@ -44,7 +44,7 @@ def test_podman():
def test_podman_badversion():
with mock.patch(_find_program_loc) as find_program:
find_program.return_value = "/usr/bin/podman"
- pm = _cephadm.Podman()
+ pm = container_engines.Podman()
find_program.assert_called()
with mock.patch(_call_throws_loc) as call_throws:
call_throws.return_value = ("4.10.beta2", None, None)
@@ -56,5 +56,5 @@ def test_podman_badversion():
def test_docker():
with mock.patch(_find_program_loc) as find_program:
find_program.return_value = "/usr/bin/docker"
- docker = _cephadm.Docker()
+ docker = container_engines.Docker()
assert str(docker) == "docker (/usr/bin/docker)"
diff --git a/src/cephadm/tests/test_daemon_form.py b/src/cephadm/tests/test_daemon_form.py
index 07896cc5855..a2d1773f1c8 100644
--- a/src/cephadm/tests/test_daemon_form.py
+++ b/src/cephadm/tests/test_daemon_form.py
@@ -6,6 +6,7 @@ from .fixtures import import_cephadm
from cephadmlib import daemon_form
from cephadmlib import daemon_identity
+from cephadmlib import daemons
_cephadm = import_cephadm()
@@ -22,7 +23,7 @@ _cephadm = import_cephadm()
('mon', _cephadm.Ceph),
('nfs', _cephadm.NFSGanesha),
('nvmeof', _cephadm.CephNvmeof),
- ('osd', _cephadm.OSD),
+ ('osd', daemons.OSD),
('prometheus', _cephadm.Monitoring),
('snmp-gateway', _cephadm.SNMPGateway),
],
diff --git a/src/cephadm/tests/test_deploy.py b/src/cephadm/tests/test_deploy.py
index c77b243dfa7..dadf3456fd5 100644
--- a/src/cephadm/tests/test_deploy.py
+++ b/src/cephadm/tests/test_deploy.py
@@ -8,38 +8,35 @@ from .fixtures import (
import_cephadm,
mock_podman,
with_cephadm_ctx,
+ FunkyPatcher,
+ funkypatch,
)
_cephadm = import_cephadm()
-def _common_mp(monkeypatch):
+def _common_patches(funkypatch):
mocks = {}
- _call = mock.MagicMock(return_value=('', '', 0))
- monkeypatch.setattr('cephadmlib.container_types.call', _call)
+ _call = funkypatch.patch('cephadmlib.container_types.call')
+ _call.return_value = ('', '', 0)
mocks['call'] = _call
- _call_throws = mock.MagicMock(return_value=0)
- monkeypatch.setattr(
- 'cephadmlib.container_types.call_throws', _call_throws
- )
+ _call_throws = funkypatch.patch('cephadmlib.container_types.call_throws')
+ _call_throws.return_value = ('', '', 0)
mocks['call_throws'] = _call_throws
- _firewalld = mock.MagicMock()
+ _firewalld = funkypatch.patch('cephadm.Firewalld')
_firewalld().external_ports.get.return_value = []
- monkeypatch.setattr('cephadm.Firewalld', _firewalld)
mocks['Firewalld'] = _firewalld
- _extract_uid_gid = mock.MagicMock()
+ _extract_uid_gid = funkypatch.patch('cephadm.extract_uid_gid', force=True)
_extract_uid_gid.return_value = (8765, 8765)
- monkeypatch.setattr('cephadm.extract_uid_gid', _extract_uid_gid)
mocks['extract_uid_gid'] = _extract_uid_gid
- _install_sysctl = mock.MagicMock()
- monkeypatch.setattr('cephadm.install_sysctl', _install_sysctl)
+ _install_sysctl = funkypatch.patch('cephadm.install_sysctl')
mocks['install_sysctl'] = _install_sysctl
return mocks
-def test_deploy_nfs_container(cephadm_fs, monkeypatch):
- mocks = _common_mp(monkeypatch)
+def test_deploy_nfs_container(cephadm_fs, funkypatch):
+ mocks = _common_patches(funkypatch)
_firewalld = mocks['Firewalld']
fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
with with_cephadm_ctx([]) as ctx:
@@ -75,8 +72,8 @@ def test_deploy_nfs_container(cephadm_fs, monkeypatch):
assert f.read() == 'FAKE'
-def test_deploy_snmp_container(cephadm_fs, monkeypatch):
- mocks = _common_mp(monkeypatch)
+def test_deploy_snmp_container(cephadm_fs, funkypatch):
+ mocks = _common_patches(funkypatch)
_firewalld = mocks['Firewalld']
fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
with with_cephadm_ctx([]) as ctx:
@@ -107,8 +104,8 @@ def test_deploy_snmp_container(cephadm_fs, monkeypatch):
assert not (basedir / 'keyring').exists()
-def test_deploy_keepalived_container(cephadm_fs, monkeypatch):
- mocks = _common_mp(monkeypatch)
+def test_deploy_keepalived_container(cephadm_fs, funkypatch):
+ mocks = _common_patches(funkypatch)
_firewalld = mocks['Firewalld']
_install_sysctl = mocks['install_sysctl']
fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
@@ -155,8 +152,8 @@ def test_deploy_keepalived_container(cephadm_fs, monkeypatch):
assert len(_install_sysctl.call_args[0][-1].get_sysctl_settings()) > 1
-def test_deploy_haproxy_container(cephadm_fs, monkeypatch):
- mocks = _common_mp(monkeypatch)
+def test_deploy_haproxy_container(cephadm_fs, funkypatch):
+ mocks = _common_patches(funkypatch)
_firewalld = mocks['Firewalld']
_install_sysctl = mocks['install_sysctl']
fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
@@ -200,8 +197,8 @@ def test_deploy_haproxy_container(cephadm_fs, monkeypatch):
assert len(_install_sysctl.call_args[0][-1].get_sysctl_settings()) > 1
-def test_deploy_iscsi_container(cephadm_fs, monkeypatch):
- mocks = _common_mp(monkeypatch)
+def test_deploy_iscsi_container(cephadm_fs, funkypatch):
+ mocks = _common_patches(funkypatch)
_firewalld = mocks['Firewalld']
fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
with with_cephadm_ctx([]) as ctx:
@@ -244,8 +241,8 @@ def test_deploy_iscsi_container(cephadm_fs, monkeypatch):
assert (si.st_uid, si.st_gid) == (8765, 8765)
-def test_deploy_nvmeof_container(cephadm_fs, monkeypatch):
- mocks = _common_mp(monkeypatch)
+def test_deploy_nvmeof_container(cephadm_fs, funkypatch):
+ mocks = _common_patches(funkypatch)
_firewalld = mocks['Firewalld']
fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
with with_cephadm_ctx([]) as ctx:
@@ -290,11 +287,11 @@ def test_deploy_nvmeof_container(cephadm_fs, monkeypatch):
assert (si.st_uid, si.st_gid) == (167, 167)
-def test_deploy_a_monitoring_container(cephadm_fs, monkeypatch):
- mocks = _common_mp(monkeypatch)
+def test_deploy_a_monitoring_container(cephadm_fs, funkypatch):
+ mocks = _common_patches(funkypatch)
_firewalld = mocks['Firewalld']
- _get_ip_addresses = mock.MagicMock(return_value=(['10.10.10.10'], []))
- monkeypatch.setattr('cephadm.get_ip_addresses', _get_ip_addresses)
+ _get_ip_addresses = funkypatch.patch('cephadmlib.net_utils.get_ip_addresses')
+ _get_ip_addresses.return_value = (['10.10.10.10'], [])
fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
with with_cephadm_ctx([]) as ctx:
ctx.container_engine = mock_podman()
@@ -330,8 +327,8 @@ def test_deploy_a_monitoring_container(cephadm_fs, monkeypatch):
assert (si.st_uid, si.st_gid) == (8765, 8765)
-def test_deploy_a_tracing_container(cephadm_fs, monkeypatch):
- mocks = _common_mp(monkeypatch)
+def test_deploy_a_tracing_container(cephadm_fs, funkypatch):
+ mocks = _common_patches(funkypatch)
_firewalld = mocks['Firewalld']
fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
with with_cephadm_ctx([]) as ctx:
@@ -361,11 +358,10 @@ def test_deploy_a_tracing_container(cephadm_fs, monkeypatch):
assert not (basedir / 'keyring').exists()
-def test_deploy_ceph_mgr_container(cephadm_fs, monkeypatch):
- mocks = _common_mp(monkeypatch)
+def test_deploy_ceph_mgr_container(cephadm_fs, funkypatch):
+ mocks = _common_patches(funkypatch)
_firewalld = mocks['Firewalld']
- _make_var_run = mock.MagicMock()
- monkeypatch.setattr('cephadm.make_var_run', _make_var_run)
+ _make_run_dir = funkypatch.patch('cephadmlib.file_utils.make_run_dir')
fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
with with_cephadm_ctx([]) as ctx:
ctx.container_engine = mock_podman()
@@ -399,16 +395,15 @@ def test_deploy_ceph_mgr_container(cephadm_fs, monkeypatch):
assert f.read() == 'XXXXXXX'
with open(basedir / 'keyring') as f:
assert f.read() == 'YYYYYY'
- assert _make_var_run.call_count == 1
- assert _make_var_run.call_args[0][2] == 8765
- assert _make_var_run.call_args[0][3] == 8765
+ assert _make_run_dir.call_count == 1
+ assert _make_run_dir.call_args[0][1] == 8765
+ assert _make_run_dir.call_args[0][2] == 8765
-def test_deploy_ceph_osd_container(cephadm_fs, monkeypatch):
- mocks = _common_mp(monkeypatch)
+def test_deploy_ceph_osd_container(cephadm_fs, funkypatch):
+ mocks = _common_patches(funkypatch)
_firewalld = mocks['Firewalld']
- _make_var_run = mock.MagicMock()
- monkeypatch.setattr('cephadm.make_var_run', _make_var_run)
+ _make_run_dir = funkypatch.patch('cephadmlib.file_utils.make_run_dir')
fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
with with_cephadm_ctx([]) as ctx:
ctx.container_engine = mock_podman()
@@ -444,18 +439,17 @@ def test_deploy_ceph_osd_container(cephadm_fs, monkeypatch):
assert f.read() == 'XXXXXXX'
with open(basedir / 'keyring') as f:
assert f.read() == 'YYYYYY'
- assert _make_var_run.call_count == 1
- assert _make_var_run.call_args[0][2] == 8765
- assert _make_var_run.call_args[0][3] == 8765
+ assert _make_run_dir.call_count == 1
+ assert _make_run_dir.call_args[0][1] == 8765
+ assert _make_run_dir.call_args[0][2] == 8765
-def test_deploy_ceph_exporter_container(cephadm_fs, monkeypatch):
- mocks = _common_mp(monkeypatch)
+def test_deploy_ceph_exporter_container(cephadm_fs, funkypatch):
+ mocks = _common_patches(funkypatch)
_firewalld = mocks['Firewalld']
- _get_ip_addresses = mock.MagicMock(return_value=(['10.10.10.10'], []))
- monkeypatch.setattr('cephadm.get_ip_addresses', _get_ip_addresses)
- _make_var_run = mock.MagicMock()
- monkeypatch.setattr('cephadm.make_var_run', _make_var_run)
+ _get_ip_addresses = funkypatch.patch('cephadmlib.net_utils.get_ip_addresses')
+ _get_ip_addresses.return_value = (['10.10.10.10'], [])
+ _make_run_dir = funkypatch.patch('cephadmlib.file_utils.make_run_dir')
fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
with with_cephadm_ctx([]) as ctx:
ctx.container_engine = mock_podman()
diff --git a/src/cephadm/tests/test_logrotate_config.py b/src/cephadm/tests/test_logrotate_config.py
new file mode 100644
index 00000000000..c97f21019d8
--- /dev/null
+++ b/src/cephadm/tests/test_logrotate_config.py
@@ -0,0 +1,57 @@
+from unittest import mock
+
+import pytest
+
+from tests.fixtures import import_cephadm, cephadm_fs
+
+from cephadmlib import logging
+
+
+_cephadm = import_cephadm()
+
+def test_cluster_logrotate_config(cephadm_fs):
+ ctx = _cephadm.CephadmContext()
+ ctx.logrotate_dir = '/my/log/dir'
+ fsid = '5dcc9af0-7cd3-11ee-9e84-525400babd0a'
+
+ cephadm_fs.create_dir(ctx.logrotate_dir)
+
+ expected_cluster_logrotate_file = """# created by cephadm
+/var/log/ceph/5dcc9af0-7cd3-11ee-9e84-525400babd0a/*.log {
+ rotate 7
+ daily
+ compress
+ sharedscripts
+ postrotate
+ killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror cephfs-mirror tcmu-runner || pkill -1 -x 'ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror|cephfs-mirror|tcmu-runner' || true
+ endscript
+ missingok
+ notifempty
+ su root root
+}"""
+
+ logging.write_cluster_logrotate_config(ctx, fsid)
+
+ with open(ctx.logrotate_dir + f'/ceph-{fsid}', 'r') as f:
+ assert f.read() == expected_cluster_logrotate_file
+
+def test_cephadm_logrotate_config(cephadm_fs):
+ ctx = _cephadm.CephadmContext()
+ ctx.logrotate_dir = '/my/log/dir'
+
+ cephadm_fs.create_dir(ctx.logrotate_dir)
+
+ expected_cephadm_logrotate_file = """# created by cephadm
+/var/log/ceph/cephadm.log {
+ rotate 7
+ daily
+ compress
+ missingok
+ notifempty
+ su root root
+}"""
+
+ logging.write_cephadm_logrotate_config(ctx)
+
+ with open(ctx.logrotate_dir + f'/cephadm', 'r') as f:
+ assert f.read() == expected_cephadm_logrotate_file
diff --git a/src/cephadm/tests/test_nfs.py b/src/cephadm/tests/test_nfs.py
index 94ab6afcfdf..aae8113382d 100644
--- a/src/cephadm/tests/test_nfs.py
+++ b/src/cephadm/tests/test_nfs.py
@@ -155,15 +155,17 @@ def test_nfsganesha_container_envs():
def test_nfsganesha_get_version():
+ from cephadmlib.daemons import nfs
+
with with_cephadm_ctx([]) as ctx:
- nfsg = _cephadm.NFSGanesha(
+ nfsg = nfs.NFSGanesha(
ctx,
SAMPLE_UUID,
"fred",
good_nfs_json(),
)
- with mock.patch("cephadm.call") as _call:
+ with mock.patch("cephadmlib.daemons.nfs.call") as _call:
_call.return_value = ("NFS-Ganesha Release = V100", "", 0)
ver = nfsg.get_version(ctx, "fake_version")
_call.assert_called()
diff --git a/src/cephadm/tests/test_unit_file.py b/src/cephadm/tests/test_unit_file.py
index 0ebd84be3e8..a1d49c93c20 100644
--- a/src/cephadm/tests/test_unit_file.py
+++ b/src/cephadm/tests/test_unit_file.py
@@ -16,24 +16,26 @@ from tests.fixtures import (
with_cephadm_ctx,
)
+from cephadmlib import context
+from cephadmlib import systemd_unit
from cephadmlib.constants import CGROUPS_SPLIT_PODMAN_VERSION
_cephadm = import_cephadm()
def _get_unit_file(ctx, fsid):
- return str(_cephadm.get_unit_file(ctx, fsid))
+ return str(systemd_unit._get_unit_file(ctx, fsid))
def test_docker_engine_requires_docker():
- ctx = _cephadm.CephadmContext()
+ ctx = context.CephadmContext()
ctx.container_engine = mock_docker()
r = _get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9')
assert 'Requires=docker.service' in r
def test_podman_engine_does_not_req_docker():
- ctx = _cephadm.CephadmContext()
+ ctx = context.CephadmContext()
ctx.container_engine = mock_podman()
r = _get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9')
assert 'Requires=docker.service' not in r
@@ -42,7 +44,7 @@ def test_podman_engine_does_not_req_docker():
def test_podman_engine_forking_service():
# verity that the podman service uses the forking service type
# and related parameters
- ctx = _cephadm.CephadmContext()
+ ctx = context.CephadmContext()
ctx.container_engine = mock_podman()
r = _get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9')
assert 'Type=forking' in r
@@ -52,7 +54,7 @@ def test_podman_engine_forking_service():
def test_podman_with_split_cgroups_sets_delegate():
- ctx = _cephadm.CephadmContext()
+ ctx = context.CephadmContext()
ctx.container_engine = mock_podman()
ctx.container_engine.version = CGROUPS_SPLIT_PODMAN_VERSION
r = _get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9')
@@ -65,7 +67,7 @@ def _ignore_blank_lines(value):
def test_new_docker():
- ctx = _cephadm.CephadmContext()
+ ctx = context.CephadmContext()
ctx.container_engine = mock_docker()
ru = _get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9')
assert _ignore_blank_lines(ru) == [
@@ -104,7 +106,7 @@ def test_new_docker():
def test_new_podman():
- ctx = _cephadm.CephadmContext()
+ ctx = context.CephadmContext()
ctx.container_engine = mock_podman()
ctx.container_engine.version = CGROUPS_SPLIT_PODMAN_VERSION
ru = _get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9')
diff --git a/src/cls/2pc_queue/cls_2pc_queue_ops.h b/src/cls/2pc_queue/cls_2pc_queue_ops.h
index 194fdf6da9d..fa4f8765ffe 100644
--- a/src/cls/2pc_queue/cls_2pc_queue_ops.h
+++ b/src/cls/2pc_queue/cls_2pc_queue_ops.h
@@ -3,12 +3,13 @@
#pragma once
+#include "common/ceph_json.h"
#include "include/types.h"
#include "cls_2pc_queue_types.h"
struct cls_2pc_queue_reserve_op {
uint64_t size;
- uint32_t entries;
+ uint32_t entries{0};
void encode(ceph::buffer::list& bl) const {
ENCODE_START(1, 1, bl);
@@ -23,6 +24,19 @@ struct cls_2pc_queue_reserve_op {
decode(entries, bl);
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("size", size);
+ f->dump_unsigned("entries", entries);
+ }
+
+ static void generate_test_instances(std::list<cls_2pc_queue_reserve_op*>& ls) {
+ ls.push_back(new cls_2pc_queue_reserve_op);
+ ls.back()->size = 0;
+ ls.push_back(new cls_2pc_queue_reserve_op);
+ ls.back()->size = 123;
+ ls.back()->entries = 456;
+ }
};
WRITE_CLASS_ENCODER(cls_2pc_queue_reserve_op)
@@ -40,6 +54,15 @@ struct cls_2pc_queue_reserve_ret {
decode(id, bl);
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("id", id);
+ }
+
+ static void generate_test_instances(std::list<cls_2pc_queue_reserve_ret*>& ls) {
+ ls.push_back(new cls_2pc_queue_reserve_ret);
+ ls.back()->id = 123;
+ }
};
WRITE_CLASS_ENCODER(cls_2pc_queue_reserve_ret)
@@ -61,6 +84,19 @@ struct cls_2pc_queue_commit_op {
DECODE_FINISH(bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("id", id);
+ encode_json("bl_data_vec", bl_data_vec, f);
+ }
+
+ static void generate_test_instances(std::list<cls_2pc_queue_commit_op*>& ls) {
+ ls.push_back(new cls_2pc_queue_commit_op);
+ ls.back()->id = 123;
+ ls.back()->bl_data_vec.push_back(ceph::buffer::list());
+ ls.back()->bl_data_vec.back().append("foo");
+ ls.back()->bl_data_vec.push_back(ceph::buffer::list());
+ ls.back()->bl_data_vec.back().append("bar");
+ }
};
WRITE_CLASS_ENCODER(cls_2pc_queue_commit_op)
@@ -78,6 +114,13 @@ struct cls_2pc_queue_abort_op {
decode(id, bl);
DECODE_FINISH(bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("id", id);
+ }
+ static void generate_test_instances(std::list<cls_2pc_queue_abort_op*>& ls) {
+ ls.push_back(new cls_2pc_queue_abort_op);
+ ls.back()->id = 1;
+ }
};
WRITE_CLASS_ENCODER(cls_2pc_queue_abort_op)
@@ -96,6 +139,14 @@ struct cls_2pc_queue_expire_op {
decode(stale_time, bl);
DECODE_FINISH(bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_stream("stale_time") << stale_time;
+ }
+ static void generate_test_instances(std::list<cls_2pc_queue_expire_op*>& ls) {
+ ls.push_back(new cls_2pc_queue_expire_op);
+ ls.push_back(new cls_2pc_queue_expire_op);
+ ls.back()->stale_time = ceph::coarse_real_time::min();
+ }
};
WRITE_CLASS_ENCODER(cls_2pc_queue_expire_op)
@@ -113,6 +164,23 @@ struct cls_2pc_queue_reservations_ret {
decode(reservations, bl);
DECODE_FINISH(bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->open_array_section("reservations");
+ for (const auto& i : reservations) {
+ f->open_object_section("reservation");
+ f->dump_unsigned("id", i.first);
+ i.second.dump(f);
+ f->close_section();
+ }
+ f->close_section();
+ }
+
+ static void generate_test_instances(std::list<cls_2pc_queue_reservations_ret*>& ls) {
+ ls.push_back(new cls_2pc_queue_reservations_ret);
+ ls.push_back(new cls_2pc_queue_reservations_ret);
+ ls.back()->reservations[1] = cls_2pc_reservation();
+ ls.back()->reservations[2] = cls_2pc_reservation();
+ }
};
WRITE_CLASS_ENCODER(cls_2pc_queue_reservations_ret)
diff --git a/src/cls/2pc_queue/cls_2pc_queue_types.h b/src/cls/2pc_queue/cls_2pc_queue_types.h
index 2413fd7043d..b270c9d6e79 100644
--- a/src/cls/2pc_queue/cls_2pc_queue_types.h
+++ b/src/cls/2pc_queue/cls_2pc_queue_types.h
@@ -34,6 +34,19 @@ struct cls_2pc_reservation
}
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("size", size);
+ f->dump_stream("timestamp") << timestamp;
+ }
+
+ static void generate_test_instances(std::list<cls_2pc_reservation*>& ls) {
+ ls.push_back(new cls_2pc_reservation);
+ ls.back()->size = 0;
+ ls.push_back(new cls_2pc_reservation);
+ ls.back()->size = 123;
+ ls.back()->timestamp = ceph::coarse_real_clock::zero();
+ }
};
WRITE_CLASS_ENCODER(cls_2pc_reservation)
@@ -68,5 +81,28 @@ struct cls_2pc_urgent_data
}
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("reserved_size", reserved_size);
+ f->dump_unsigned("last_id", last_id);
+ f->open_array_section("reservations");
+ for (const auto& [id, res] : reservations) {
+ f->open_object_section("reservation");
+ f->dump_unsigned("id", id);
+ res.dump(f);
+ f->close_section();
+ }
+ f->close_section();
+ f->dump_bool("has_xattrs", has_xattrs);
+ }
+
+ static void generate_test_instances(std::list<cls_2pc_urgent_data*>& ls) {
+ ls.push_back(new cls_2pc_urgent_data);
+ ls.push_back(new cls_2pc_urgent_data);
+ ls.back()->reserved_size = 123;
+ ls.back()->last_id = 456;
+ ls.back()->reservations.emplace(789, cls_2pc_reservation(1, ceph::coarse_real_clock::zero(), 2));
+ ls.back()->has_xattrs = true;
+ }
};
WRITE_CLASS_ENCODER(cls_2pc_urgent_data)
diff --git a/src/cls/cas/cls_cas_internal.h b/src/cls/cas/cls_cas_internal.h
index 09e7f9f1f69..037e60fffd9 100644
--- a/src/cls/cas/cls_cas_internal.h
+++ b/src/cls/cas/cls_cas_internal.h
@@ -145,6 +145,12 @@ struct chunk_refs_by_object_t : public chunk_refs_t::refs_t {
}
f->close_section();
}
+ static void generate_test_instances(std::list<chunk_refs_by_object_t*>& ls) {
+ ls.push_back(new chunk_refs_by_object_t());
+ ls.push_back(new chunk_refs_by_object_t());
+ ls.back()->by_object.insert(hobject_t(sobject_t("foo", CEPH_NOSNAP)));
+ ls.back()->by_object.insert(hobject_t(sobject_t("bar", CEPH_NOSNAP)));
+ }
};
WRITE_CLASS_ENCODER(chunk_refs_by_object_t)
@@ -386,6 +392,11 @@ struct chunk_refs_count_t : public chunk_refs_t::refs_t {
f->dump_string("type", "count");
f->dump_unsigned("count", total);
}
+ static void generate_test_instances(std::list<chunk_refs_count_t*>& o) {
+ o.push_back(new chunk_refs_count_t);
+ o.push_back(new chunk_refs_count_t);
+ o.back()->total = 123;
+ }
};
WRITE_CLASS_ENCODER(chunk_refs_count_t)
diff --git a/src/cls/fifo/cls_fifo_ops.h b/src/cls/fifo/cls_fifo_ops.h
index e850c635c0b..d466122a952 100644
--- a/src/cls/fifo/cls_fifo_ops.h
+++ b/src/cls/fifo/cls_fifo_ops.h
@@ -67,6 +67,31 @@ struct create_meta
decode(exclusive, bl);
DECODE_FINISH(bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_string("id", id);
+ f->dump_object("version", version.value_or(objv()));
+ f->dump_string("pool_name", pool.name);
+ f->dump_string("pool_ns", pool.ns);
+ f->dump_string("oid_prefix", oid_prefix.value_or(""));
+ f->dump_unsigned("max_part_size", max_part_size);
+ f->dump_unsigned("max_entry_size", max_entry_size);
+ f->dump_bool("exclusive", exclusive);
+ }
+ static void generate_test_instances(std::list<create_meta*>& o) {
+ o.push_back(new create_meta);
+ o.push_back(new create_meta);
+ o.back()->id = "id";
+ objv v1;
+ v1.instance = "inst1";
+ v1.ver = 1;
+ o.back()->version = v1;
+ o.back()->pool.name = "pool";
+ o.back()->pool.ns = "ns";
+ o.back()->oid_prefix = "prefix";
+ o.back()->max_part_size = 1024;
+ o.back()->max_entry_size = 1024;
+ o.back()->exclusive = true;
+ }
};
WRITE_CLASS_ENCODER(create_meta)
@@ -84,6 +109,17 @@ struct get_meta
decode(version, bl);
DECODE_FINISH(bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_object("version", version.value_or(objv()));
+ }
+ static void generate_test_instances(std::list<get_meta*>& o) {
+ o.push_back(new get_meta);
+ o.push_back(new get_meta);
+ objv v1;
+ v1.instance = "inst1";
+ v1.ver = 1;
+ o.back()->version = v1;
+ }
};
WRITE_CLASS_ENCODER(get_meta)
@@ -108,6 +144,18 @@ struct get_meta_reply
decode(part_entry_overhead, bl);
DECODE_FINISH(bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_object("info", info);
+ f->dump_unsigned("part_header_size", part_header_size);
+ f->dump_unsigned("part_entry_overhead", part_entry_overhead);
+ }
+ static void generate_test_instances(std::list<get_meta_reply*>& o) {
+ o.push_back(new get_meta_reply);
+ o.push_back(new get_meta_reply);
+ o.back()->info = fifo::info();
+ o.back()->part_header_size = 1024;
+ o.back()->part_entry_overhead = 1024;
+ }
};
WRITE_CLASS_ENCODER(get_meta_reply)
diff --git a/src/cls/fifo/cls_fifo_types.h b/src/cls/fifo/cls_fifo_types.h
index 1c69c1f0871..2ae601e4aaf 100644
--- a/src/cls/fifo/cls_fifo_types.h
+++ b/src/cls/fifo/cls_fifo_types.h
@@ -54,7 +54,16 @@ struct objv {
decode(ver, bl);
DECODE_FINISH(bl);
}
- void dump(ceph::Formatter* f) const;
+ void dump(ceph::Formatter* f) const {
+ f->dump_string("instance", instance);
+ f->dump_unsigned("ver", ver);
+ }
+ static void generate_test_instances(std::list<objv*>& o) {
+ o.push_back(new objv);
+ o.push_back(new objv);
+ o.back()->instance = "instance";
+ o.back()->ver = 1;
+ }
void decode_json(JSONObj* obj);
bool operator ==(const objv& rhs) const {
@@ -103,7 +112,18 @@ struct data_params {
decode(full_size_threshold, bl);
DECODE_FINISH(bl);
}
- void dump(ceph::Formatter* f) const;
+ void dump(ceph::Formatter* f) const {
+ f->dump_unsigned("max_part_size", max_part_size);
+ f->dump_unsigned("max_entry_size", max_entry_size);
+ f->dump_unsigned("full_size_threshold", full_size_threshold);
+ }
+ static void generate_test_instances(std::list<data_params*>& o) {
+ o.push_back(new data_params);
+ o.push_back(new data_params);
+ o.back()->max_part_size = 1;
+ o.back()->max_entry_size = 2;
+ o.back()->full_size_threshold = 3;
+ }
void decode_json(JSONObj* obj);
auto operator <=>(const data_params&) const = default;
@@ -161,7 +181,10 @@ struct journal_entry {
decode(part_tag, bl);
DECODE_FINISH(bl);
}
- void dump(ceph::Formatter* f) const;
+ void dump(ceph::Formatter* f) const {
+ f->dump_int("op", (int)op);
+ f->dump_int("part_num", part_num);
+ }
auto operator <=>(const journal_entry&) const = default;
};
@@ -397,7 +420,38 @@ struct info {
decode_journal(bl);
DECODE_FINISH(bl);
}
- void dump(ceph::Formatter* f) const;
+ void dump(ceph::Formatter* f) const {
+ f->dump_string("id", id);
+ f->dump_object("version", version);
+ f->dump_string("oid_prefix", oid_prefix);
+ f->dump_object("params", params);
+ f->dump_int("tail_part_num", tail_part_num);
+ f->dump_int("head_part_num", head_part_num);
+ f->dump_int("min_push_part_num", min_push_part_num);
+ f->dump_int("max_push_part_num", max_push_part_num);
+ f->open_array_section("journal");
+ for (const auto& entry : journal) {
+ f->open_object_section("entry");
+ f->dump_object("entry", entry);
+ f->close_section();
+ }
+ f->close_section();
+ }
+ static void generate_test_instances(std::list<info*>& o) {
+ o.push_back(new info);
+ o.push_back(new info);
+ o.back()->id = "myid";
+ o.back()->version = objv();
+ o.back()->oid_prefix = "myprefix";
+ o.back()->params = data_params();
+ o.back()->tail_part_num = 123;
+ o.back()->head_part_num = 456;
+ o.back()->min_push_part_num = 789;
+ o.back()->max_push_part_num = 101112;
+ o.back()->journal.insert(journal_entry(journal_entry::Op::create, 1));
+ o.back()->journal.insert(journal_entry(journal_entry::Op::create, 2));
+ o.back()->journal.insert(journal_entry(journal_entry::Op::create, 3));
+ }
void decode_json(JSONObj* obj);
std::string part_oid(std::int64_t part_num) const {
diff --git a/src/cls/log/cls_log_ops.h b/src/cls/log/cls_log_ops.h
index 5a65892598b..4d3b2f5d309 100644
--- a/src/cls/log/cls_log_ops.h
+++ b/src/cls/log/cls_log_ops.h
@@ -4,6 +4,7 @@
#ifndef CEPH_CLS_LOG_OPS_H
#define CEPH_CLS_LOG_OPS_H
+#include "common/ceph_json.h"
#include "cls_log_types.h"
struct cls_log_add_op {
@@ -73,6 +74,21 @@ struct cls_log_list_op {
decode(max_entries, bl);
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter* f) const {
+ f->dump_stream("from_time") << from_time;
+ f->dump_string("marker", marker);
+ f->dump_stream("to_time") << to_time;
+ f->dump_int("max_entries", max_entries);
+ }
+ static void generate_test_instances(std::list<cls_log_list_op*>& ls) {
+ ls.push_back(new cls_log_list_op);
+ ls.push_back(new cls_log_list_op);
+ ls.back()->from_time = utime_t(1, 2);
+ ls.back()->marker = "marker";
+ ls.back()->to_time = utime_t(3, 4);
+ ls.back()->max_entries = 5;
+ }
};
WRITE_CLASS_ENCODER(cls_log_list_op)
@@ -98,6 +114,25 @@ struct cls_log_list_ret {
decode(truncated, bl);
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter* f) const {
+ encode_json("entries", entries, f);
+ f->dump_string("marker", marker);
+ f->dump_bool("truncated", truncated);
+ }
+ static void generate_test_instances(std::list<cls_log_list_ret*>& ls) {
+ ls.push_back(new cls_log_list_ret);
+ ls.push_back(new cls_log_list_ret);
+ ls.back()->entries.push_back(cls_log_entry());
+ ls.back()->entries.push_back(cls_log_entry());
+ ls.back()->entries.back().section = "section";
+ ls.back()->entries.back().name = "name";
+ ls.back()->entries.back().timestamp = utime_t(1, 2);
+ ls.back()->entries.back().data.append("data");
+ ls.back()->entries.back().id = "id";
+ ls.back()->marker = "marker";
+ ls.back()->truncated = true;
+ }
};
WRITE_CLASS_ENCODER(cls_log_list_ret)
@@ -133,6 +168,20 @@ struct cls_log_trim_op {
}
DECODE_FINISH(bl);
}
+ void dump(ceph::Formatter* f) const {
+ f->dump_stream("from_time") << from_time;
+ f->dump_stream("to_time") << to_time;
+ f->dump_string("from_marker", from_marker);
+ f->dump_string("to_marker", to_marker);
+ }
+ static void generate_test_instances(std::list<cls_log_trim_op*>& ls) {
+ ls.push_back(new cls_log_trim_op);
+ ls.push_back(new cls_log_trim_op);
+ ls.back()->from_time = utime_t(1, 2);
+ ls.back()->to_time = utime_t(3, 4);
+ ls.back()->from_marker = "from_marker";
+ ls.back()->to_marker = "to_marker";
+ }
};
WRITE_CLASS_ENCODER(cls_log_trim_op)
@@ -150,6 +199,13 @@ struct cls_log_info_op {
// currently empty request
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter* f) const {
+ }
+
+ static void generate_test_instances(std::list<cls_log_info_op*>& ls) {
+ ls.push_back(new cls_log_info_op);
+ }
};
WRITE_CLASS_ENCODER(cls_log_info_op)
diff --git a/src/cls/log/cls_log_types.h b/src/cls/log/cls_log_types.h
index 33b8cce51e5..29aa2bae8fe 100644
--- a/src/cls/log/cls_log_types.h
+++ b/src/cls/log/cls_log_types.h
@@ -92,6 +92,16 @@ struct cls_log_header {
decode(max_time, bl);
DECODE_FINISH(bl);
}
+ void dump(ceph::Formatter* f) const {
+ f->dump_string("max_marker", max_marker);
+ f->dump_stream("max_time") << max_time;
+ }
+ static void generate_test_instances(std::list<cls_log_header*>& o) {
+ o.push_back(new cls_log_header);
+ o.push_back(new cls_log_header);
+ o.back()->max_marker = "test_marker";
+ o.back()->max_time = utime_t();
+ }
};
inline bool operator ==(const cls_log_header& lhs, const cls_log_header& rhs) {
return (lhs.max_marker == rhs.max_marker &&
diff --git a/src/cls/queue/cls_queue_ops.h b/src/cls/queue/cls_queue_ops.h
index f41572036c5..25bf1200c54 100644
--- a/src/cls/queue/cls_queue_ops.h
+++ b/src/cls/queue/cls_queue_ops.h
@@ -4,6 +4,7 @@
#ifndef CEPH_CLS_QUEUE_OPS_H
#define CEPH_CLS_QUEUE_OPS_H
+#include "common/ceph_json.h"
#include "cls/queue/cls_queue_types.h"
struct cls_queue_init_op {
@@ -29,6 +30,19 @@ struct cls_queue_init_op {
DECODE_FINISH(bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("queue_size", queue_size);
+ f->dump_unsigned("max_urgent_data_size", max_urgent_data_size);
+ f->dump_unsigned("urgent_data_len", bl_urgent_data.length());
+ }
+
+ static void generate_test_instances(std::list<cls_queue_init_op*>& o) {
+ o.push_back(new cls_queue_init_op);
+ o.push_back(new cls_queue_init_op);
+ o.back()->queue_size = 1024;
+ o.back()->max_urgent_data_size = 1024;
+ o.back()->bl_urgent_data.append(std::string_view("data"));
+ }
};
WRITE_CLASS_ENCODER(cls_queue_init_op)
@@ -47,12 +61,23 @@ struct cls_queue_enqueue_op {
DECODE_START(1, bl);
decode(bl_data_vec, bl);
DECODE_FINISH(bl);
- }
+ }
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("data_vec_len", bl_data_vec.size());
+ }
+
+ static void generate_test_instances(std::list<cls_queue_enqueue_op*>& o) {
+ o.push_back(new cls_queue_enqueue_op);
+ o.push_back(new cls_queue_enqueue_op);
+ o.back()->bl_data_vec.push_back(ceph::buffer::list());
+ o.back()->bl_data_vec.back().append(std::string_view("data"));
+ }
};
WRITE_CLASS_ENCODER(cls_queue_enqueue_op)
struct cls_queue_list_op {
- uint64_t max;
+ uint64_t max{0};
std::string start_marker;
std::string end_marker;
@@ -75,6 +100,18 @@ struct cls_queue_list_op {
}
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("max", max);
+ f->dump_string("start_marker", start_marker);
+ }
+
+ static void generate_test_instances(std::list<cls_queue_list_op*>& o) {
+ o.push_back(new cls_queue_list_op);
+ o.push_back(new cls_queue_list_op);
+ o.back()->max = 123;
+ o.back()->start_marker = "foo";
+ }
};
WRITE_CLASS_ENCODER(cls_queue_list_op)
@@ -100,6 +137,22 @@ struct cls_queue_list_ret {
decode(entries, bl);
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_bool("is_truncated", is_truncated);
+ f->dump_string("next_marker", next_marker);
+ encode_json("entries", entries, f);
+ }
+
+ static void generate_test_instances(std::list<cls_queue_list_ret*>& o) {
+ o.push_back(new cls_queue_list_ret);
+ o.back()->is_truncated = true;
+ o.back()->next_marker = "foo";
+ o.back()->entries.push_back(cls_queue_entry());
+ o.back()->entries.push_back(cls_queue_entry());
+ o.back()->entries.back().marker = "id";
+ o.back()->entries.back().data.append(std::string_view("data"));
+ }
};
WRITE_CLASS_ENCODER(cls_queue_list_ret)
@@ -119,6 +172,15 @@ struct cls_queue_remove_op {
decode(end_marker, bl);
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_string("end_marker", end_marker);
+ }
+ static void generate_test_instances(std::list<cls_queue_remove_op*>& o) {
+ o.push_back(new cls_queue_remove_op);
+ o.push_back(new cls_queue_remove_op);
+ o.back()->end_marker = "foo";
+ }
};
WRITE_CLASS_ENCODER(cls_queue_remove_op)
@@ -138,6 +200,14 @@ struct cls_queue_get_capacity_ret {
decode(queue_capacity, bl);
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("queue_capacity", queue_capacity);
+ }
+ static void generate_test_instances(std::list<cls_queue_get_capacity_ret*>& o) {
+ o.push_back(new cls_queue_get_capacity_ret);
+ o.back()->queue_capacity = 123;
+ }
};
WRITE_CLASS_ENCODER(cls_queue_get_capacity_ret)
diff --git a/src/cls/queue/cls_queue_types.h b/src/cls/queue/cls_queue_types.h
index cc46df40505..3c3e828edf0 100644
--- a/src/cls/queue/cls_queue_types.h
+++ b/src/cls/queue/cls_queue_types.h
@@ -34,6 +34,17 @@ struct cls_queue_entry
decode(marker, bl);
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_string("marker", marker);
+ f->dump_unsigned("data_len", data.length());
+ }
+ static void generate_test_instances(std::list<cls_queue_entry*>& o) {
+ o.push_back(new cls_queue_entry);
+ o.push_back(new cls_queue_entry);
+ o.back()->data.append(std::string_view("data"));
+ o.back()->marker = "marker";
+ }
};
WRITE_CLASS_ENCODER(cls_queue_entry)
@@ -80,7 +91,16 @@ struct cls_queue_marker
}
return 0;
}
-
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("offset", offset);
+ f->dump_unsigned("gen", gen);
+ }
+ static void generate_test_instances(std::list<cls_queue_marker*>& o) {
+ o.push_back(new cls_queue_marker);
+ o.push_back(new cls_queue_marker);
+ o.back()->offset = 1024;
+ o.back()->gen = 0;
+ }
};
WRITE_CLASS_ENCODER(cls_queue_marker)
@@ -114,6 +134,27 @@ struct cls_queue_head
decode(bl_urgent_data, bl);
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("max_head_size", max_head_size);
+ f->dump_unsigned("queue_size", queue_size);
+ f->dump_unsigned("max_urgent_data_size", max_urgent_data_size);
+ f->dump_unsigned("front_offset", front.offset);
+ f->dump_unsigned("front_gen", front.gen);
+ f->dump_unsigned("tail_offset", tail.offset);
+ f->dump_unsigned("tail_gen", tail.gen);
+ }
+ static void generate_test_instances(std::list<cls_queue_head*>& o) {
+ o.push_back(new cls_queue_head);
+ o.push_back(new cls_queue_head);
+ o.back()->max_head_size = 1024;
+ o.back()->front.offset = 1024;
+ o.back()->front.gen = 0;
+ o.back()->tail.offset = 1024;
+ o.back()->tail.gen = 0;
+ o.back()->queue_size = 1024;
+ o.back()->max_urgent_data_size = 0;
+ }
};
WRITE_CLASS_ENCODER(cls_queue_head)
diff --git a/src/cls/rgw/cls_rgw_client.cc b/src/cls/rgw/cls_rgw_client.cc
index ed2be322da5..966823729b7 100644
--- a/src/cls/rgw/cls_rgw_client.cc
+++ b/src/cls/rgw/cls_rgw_client.cc
@@ -751,12 +751,11 @@ int CLSRGWIssueBucketBILogStop::issue_op(const int shard_id, const string& oid)
}
class GetDirHeaderCompletion : public ObjectOperationCompletion {
- RGWGetDirHeader_CB *ret_ctx;
+ boost::intrusive_ptr<RGWGetDirHeader_CB> cb;
public:
- explicit GetDirHeaderCompletion(RGWGetDirHeader_CB *_ctx) : ret_ctx(_ctx) {}
- ~GetDirHeaderCompletion() override {
- ret_ctx->put();
- }
+ explicit GetDirHeaderCompletion(boost::intrusive_ptr<RGWGetDirHeader_CB> cb)
+ : cb(std::move(cb)) {}
+
void handle_completion(int r, bufferlist& outbl) override {
rgw_cls_list_ret ret;
try {
@@ -765,20 +764,20 @@ public:
} catch (ceph::buffer::error& err) {
r = -EIO;
}
-
- ret_ctx->handle_response(r, ret.dir.header);
+ cb->handle_response(r, ret.dir.header);
}
};
-int cls_rgw_get_dir_header_async(IoCtx& io_ctx, string& oid, RGWGetDirHeader_CB *ctx)
+int cls_rgw_get_dir_header_async(IoCtx& io_ctx, const string& oid,
+ boost::intrusive_ptr<RGWGetDirHeader_CB> cb)
{
bufferlist in, out;
rgw_cls_list_op call;
call.num_entries = 0;
encode(call, in);
ObjectReadOperation op;
- GetDirHeaderCompletion *cb = new GetDirHeaderCompletion(ctx);
- op.exec(RGW_CLASS, RGW_BUCKET_LIST, in, cb);
+ op.exec(RGW_CLASS, RGW_BUCKET_LIST, in,
+ new GetDirHeaderCompletion(std::move(cb)));
AioCompletion *c = librados::Rados::aio_create_completion(nullptr, nullptr);
int r = io_ctx.aio_operate(oid, c, &op, NULL);
c->release();
diff --git a/src/cls/rgw/cls_rgw_client.h b/src/cls/rgw/cls_rgw_client.h
index 3db73065171..6f9084acdd3 100644
--- a/src/cls/rgw/cls_rgw_client.h
+++ b/src/cls/rgw/cls_rgw_client.h
@@ -3,6 +3,8 @@
#pragma once
+#include <boost/intrusive_ptr.hpp>
+#include <boost/smart_ptr/intrusive_ref_counter.hpp>
#include "include/str_list.h"
#include "include/rados/librados.hpp"
#include "cls_rgw_ops.h"
@@ -151,10 +153,10 @@ public:
}
};
-class RGWGetDirHeader_CB : public RefCountedObject {
+class RGWGetDirHeader_CB : public boost::intrusive_ref_counter<RGWGetDirHeader_CB> {
public:
- ~RGWGetDirHeader_CB() override {}
- virtual void handle_response(int r, rgw_bucket_dir_header& header) = 0;
+ virtual ~RGWGetDirHeader_CB() {}
+ virtual void handle_response(int r, const rgw_bucket_dir_header& header) = 0;
};
class BucketIndexShardsManager {
@@ -572,7 +574,8 @@ public:
virtual ~CLSRGWIssueBucketBILogStop() override {}
};
-int cls_rgw_get_dir_header_async(librados::IoCtx& io_ctx, std::string& oid, RGWGetDirHeader_CB *ctx);
+int cls_rgw_get_dir_header_async(librados::IoCtx& io_ctx, const std::string& oid,
+ boost::intrusive_ptr<RGWGetDirHeader_CB> cb);
void cls_rgw_encode_suggestion(char op, rgw_bucket_dir_entry& dirent, ceph::buffer::list& updates);
diff --git a/src/cls/timeindex/cls_timeindex_ops.h b/src/cls/timeindex/cls_timeindex_ops.h
index f40058954dc..f0f0cc02475 100644
--- a/src/cls/timeindex/cls_timeindex_ops.h
+++ b/src/cls/timeindex/cls_timeindex_ops.h
@@ -4,6 +4,7 @@
#ifndef CEPH_CLS_TIMEINDEX_OPS_H
#define CEPH_CLS_TIMEINDEX_OPS_H
+#include "common/ceph_json.h"
#include "cls_timeindex_types.h"
struct cls_timeindex_add_op {
@@ -51,6 +52,26 @@ struct cls_timeindex_list_op {
decode(max_entries, bl);
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->open_object_section("from_time");
+ from_time.dump(f);
+ f->close_section();
+ f->dump_string("marker", marker);
+ f->open_object_section("to_time");
+ to_time.dump(f);
+ f->close_section();
+ f->dump_int("max_entries", max_entries);
+ }
+
+ static void generate_test_instances(std::list<cls_timeindex_list_op*>& o) {
+ o.push_back(new cls_timeindex_list_op);
+ o.push_back(new cls_timeindex_list_op);
+ o.back()->from_time = utime_t(1, 2);
+ o.back()->marker = "marker";
+ o.back()->to_time = utime_t(3, 4);
+ o.back()->max_entries = 5;
+ }
};
WRITE_CLASS_ENCODER(cls_timeindex_list_op)
@@ -76,6 +97,23 @@ struct cls_timeindex_list_ret {
decode(truncated, bl);
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ encode_json("entries", entries, f);
+ f->dump_string("marker", marker);
+ f->dump_bool("truncated", truncated);
+ }
+
+ static void generate_test_instances(std::list<cls_timeindex_list_ret*>& o) {
+ o.push_back(new cls_timeindex_list_ret);
+ o.push_back(new cls_timeindex_list_ret);
+ o.back()->entries.push_back(cls_timeindex_entry());
+ o.back()->entries.back().key_ts = utime_t(1, 2);
+ o.back()->entries.back().key_ext = "key_ext";
+ o.back()->entries.back().value.append("value");
+ o.back()->marker = "marker";
+ o.back()->truncated = true;
+ }
};
WRITE_CLASS_ENCODER(cls_timeindex_list_ret)
diff --git a/src/cls/timeindex/cls_timeindex_types.h b/src/cls/timeindex/cls_timeindex_types.h
index d33886881be..ea8d6c93d2c 100644
--- a/src/cls/timeindex/cls_timeindex_types.h
+++ b/src/cls/timeindex/cls_timeindex_types.h
@@ -4,9 +4,9 @@
#ifndef CEPH_CLS_TIMEINDEX_TYPES_H
#define CEPH_CLS_TIMEINDEX_TYPES_H
+#include "common/Formatter.h"
#include "include/encoding.h"
#include "include/types.h"
-
#include "include/utime.h"
class JSONObj;
diff --git a/src/cls/version/cls_version_ops.h b/src/cls/version/cls_version_ops.h
index 62cd1172982..2eff788ce52 100644
--- a/src/cls/version/cls_version_ops.h
+++ b/src/cls/version/cls_version_ops.h
@@ -5,6 +5,7 @@
#define CEPH_CLS_VERSION_OPS_H
#include "cls_version_types.h"
+#include "common/ceph_json.h"
struct cls_version_set_op {
obj_version objv;
@@ -22,6 +23,17 @@ struct cls_version_set_op {
decode(objv, bl);
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_object("objv", objv);
+ }
+
+ static void generate_test_instances(std::list<cls_version_set_op*>& o) {
+ o.push_back(new cls_version_set_op);
+ o.push_back(new cls_version_set_op);
+ o.back()->objv.ver = 123;
+ o.back()->objv.tag = "foo";
+ }
};
WRITE_CLASS_ENCODER(cls_version_set_op)
@@ -44,6 +56,22 @@ struct cls_version_inc_op {
decode(conds, bl);
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_object("objv", objv);
+ encode_json("conds", conds, f);
+ }
+
+ static void generate_test_instances(std::list<cls_version_inc_op*>& o) {
+ o.push_back(new cls_version_inc_op);
+ o.push_back(new cls_version_inc_op);
+ o.back()->objv.ver = 123;
+ o.back()->objv.tag = "foo";
+ o.back()->conds.push_back(obj_version_cond());
+ o.back()->conds.back().ver.ver = 123;
+ o.back()->conds.back().ver.tag = "foo";
+ o.back()->conds.back().cond = VER_COND_GE;
+ }
};
WRITE_CLASS_ENCODER(cls_version_inc_op)
@@ -66,6 +94,22 @@ struct cls_version_check_op {
decode(conds, bl);
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_object("objv", objv);
+ encode_json("conds", conds, f);
+ }
+
+ static void generate_test_instances(std::list<cls_version_check_op*>& o) {
+ o.push_back(new cls_version_check_op);
+ o.push_back(new cls_version_check_op);
+ o.back()->objv.ver = 123;
+ o.back()->objv.tag = "foo";
+ o.back()->conds.push_back(obj_version_cond());
+ o.back()->conds.back().ver.ver = 123;
+ o.back()->conds.back().ver.tag = "foo";
+ o.back()->conds.back().cond = VER_COND_GE;
+ }
};
WRITE_CLASS_ENCODER(cls_version_check_op)
@@ -85,6 +129,17 @@ struct cls_version_read_ret {
decode(objv, bl);
DECODE_FINISH(bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_object("objv", objv);
+ }
+
+ static void generate_test_instances(std::list<cls_version_read_ret*>& o) {
+ o.push_back(new cls_version_read_ret);
+ o.push_back(new cls_version_read_ret);
+ o.back()->objv.ver = 123;
+ o.back()->objv.tag = "foo";
+ }
};
WRITE_CLASS_ENCODER(cls_version_read_ret)
diff --git a/src/cls/version/cls_version_types.cc b/src/cls/version/cls_version_types.cc
index b82f6aa8a5d..735ef7c8985 100644
--- a/src/cls/version/cls_version_types.cc
+++ b/src/cls/version/cls_version_types.cc
@@ -6,12 +6,6 @@
#include "common/ceph_json.h"
-void obj_version::dump(ceph::Formatter *f) const
-{
- f->dump_int("ver", ver);
- f->dump_string("tag", tag);
-}
-
void obj_version::decode_json(JSONObj *obj)
{
JSONDecoder::decode_json("ver", ver, obj);
diff --git a/src/cls/version/cls_version_types.h b/src/cls/version/cls_version_types.h
index 62cc16e33d5..dafa866e1f3 100644
--- a/src/cls/version/cls_version_types.h
+++ b/src/cls/version/cls_version_types.h
@@ -53,7 +53,11 @@ struct obj_version {
tag.compare(v.tag) == 0);
}
- void dump(ceph::Formatter *f) const;
+ void dump(ceph::Formatter *f) const {
+ f->dump_int("ver", ver);
+ f->dump_string("tag", tag);
+ }
+
void decode_json(JSONObj *obj);
static void generate_test_instances(std::list<obj_version*>& o);
};
@@ -91,6 +95,18 @@ struct obj_version_cond {
DECODE_FINISH(bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_object("ver", ver);
+ f->dump_unsigned("cond", cond);
+ }
+
+ static void generate_test_instances(std::list<obj_version_cond*>& o) {
+ o.push_back(new obj_version_cond);
+ o.push_back(new obj_version_cond);
+ o.back()->ver.ver = 1;
+ o.back()->ver.tag = "foo";
+ o.back()->cond = VER_COND_EQ;
+ }
};
WRITE_CLASS_ENCODER(obj_version_cond)
diff --git a/src/common/RefCountedObj.h b/src/common/RefCountedObj.h
index ef966463cda..a2667757399 100644
--- a/src/common/RefCountedObj.h
+++ b/src/common/RefCountedObj.h
@@ -180,6 +180,12 @@ struct RefCountedWaitObject {
}
};
+static inline void intrusive_ptr_add_ref(RefCountedWaitObject *p) {
+ p->get();
+}
+static inline void intrusive_ptr_release(RefCountedWaitObject *p) {
+ p->put();
+}
#endif // !defined(WITH_SEASTAR)|| defined(WITH_ALIEN)
static inline void intrusive_ptr_add_ref(const RefCountedObject *p) {
@@ -196,7 +202,7 @@ struct UniquePtrDeleter
p->put();
}
};
-}
+} // namespace TOPNSPC::common
using RefCountedPtr = ceph::ref_t<TOPNSPC::common::RefCountedObject>;
#endif
diff --git a/src/common/ceph_json.h b/src/common/ceph_json.h
index 08e8d9e4662..f7a899fd935 100644
--- a/src/common/ceph_json.h
+++ b/src/common/ceph_json.h
@@ -836,6 +836,61 @@ public:
DECODE_FINISH(bl);
}
+ void dump(ceph::Formatter *f) const {
+ switch (type) {
+ case FMT_VALUE:
+ if (value.quoted) {
+ f->dump_string("value", value.str);
+ } else {
+ f->dump_format_unquoted("value", "%s", value.str.c_str());
+ }
+ break;
+ case FMT_ARRAY:
+ f->open_array_section("array");
+ for (auto& i : arr) {
+ i.dump(f);
+ }
+ f->close_section();
+ break;
+ case FMT_OBJ:
+ f->open_object_section("object");
+ for (auto& i : obj) {
+ f->dump_object(i.first.c_str(), i.second);
+ }
+ f->close_section();
+ break;
+ default:
+ break;
+ }
+ }
+ static void generate_test_instances(std::list<JSONFormattable*>& o) {
+ o.push_back(new JSONFormattable);
+ o.push_back(new JSONFormattable);
+ o.back()->set_type(FMT_VALUE);
+ o.back()->value.str = "foo";
+ o.back()->value.quoted = true;
+ o.push_back(new JSONFormattable);
+ o.back()->set_type(FMT_VALUE);
+ o.back()->value.str = "foo";
+ o.back()->value.quoted = false;
+ o.push_back(new JSONFormattable);
+ o.back()->set_type(FMT_ARRAY);
+ o.back()->arr.push_back(JSONFormattable());
+ o.back()->arr.back().set_type(FMT_VALUE);
+ o.back()->arr.back().value.str = "foo";
+ o.back()->arr.back().value.quoted = true;
+ o.back()->arr.push_back(JSONFormattable());
+ o.back()->arr.back().set_type(FMT_VALUE);
+ o.back()->arr.back().value.str = "bar";
+ o.back()->arr.back().value.quoted = true;
+ o.push_back(new JSONFormattable);
+ o.back()->set_type(FMT_OBJ);
+ o.back()->obj["foo"] = JSONFormattable();
+ o.back()->obj["foo"].set_type(FMT_VALUE);
+ o.back()->obj["foo"].value.str = "bar";
+ o.back()->obj["foo"].value.quoted = true;
+ }
+
const std::string& val() const {
return value.str;
}
diff --git a/src/common/entity_name.cc b/src/common/entity_name.cc
index 5357b34eacb..a9d6fb9c8b1 100644
--- a/src/common/entity_name.cc
+++ b/src/common/entity_name.cc
@@ -29,21 +29,30 @@ const std::array<EntityName::str_to_entity_type_t, 6> EntityName::STR_TO_ENTITY_
{ CEPH_ENTITY_TYPE_CLIENT, "client" },
}};
-const std::string& EntityName::
-to_str() const
-{
+void EntityName::dump(ceph::Formatter *f) const {
+ f->dump_int("type", type);
+ f->dump_string("id", id);
+}
+
+void EntityName::generate_test_instances(std::list<EntityName*>& ls) {
+ ls.push_back(new EntityName);
+ ls.push_back(new EntityName);
+ ls.back()->set_type(CEPH_ENTITY_TYPE_OSD);
+ ls.back()->set_id("0");
+ ls.push_back(new EntityName);
+ ls.back()->set_type(CEPH_ENTITY_TYPE_MDS);
+ ls.back()->set_id("a");
+}
+
+const std::string& EntityName::to_str() const {
return type_id;
}
-const char* EntityName::
-to_cstr() const
-{
+const char* EntityName::to_cstr() const {
return type_id.c_str();
}
-bool EntityName::
-from_str(std::string_view s)
-{
+bool EntityName::from_str(std::string_view s) {
size_t pos = s.find('.');
if (pos == string::npos)
@@ -56,9 +65,7 @@ from_str(std::string_view s)
return true;
}
-void EntityName::
-set(uint32_t type_, std::string_view id_)
-{
+void EntityName::set(uint32_t type_, std::string_view id_) {
type = type_;
id = id_;
@@ -71,9 +78,7 @@ set(uint32_t type_, std::string_view id_)
}
}
-int EntityName::
-set(std::string_view type_, std::string_view id_)
-{
+int EntityName::set(std::string_view type_, std::string_view id_) {
uint32_t t = str_to_ceph_entity_type(type_);
if (t == CEPH_ENTITY_TYPE_ANY)
return -EINVAL;
@@ -81,9 +86,7 @@ set(std::string_view type_, std::string_view id_)
return 0;
}
-void EntityName::
-set_type(uint32_t type_)
-{
+void EntityName::set_type(uint32_t type_) {
set(type_, id);
}
@@ -93,9 +96,7 @@ set_type(std::string_view type_)
return set(type_, id);
}
-void EntityName::
-set_id(std::string_view id_)
-{
+void EntityName::set_id(std::string_view id_) {
set(type, id_);
}
@@ -106,33 +107,23 @@ void EntityName::set_name(entity_name_t n)
set(n.type(), s);
}
-const char* EntityName::
-get_type_str() const
-{
+const char* EntityName::get_type_str() const {
return ceph_entity_type_name(type);
}
-std::string_view EntityName::
-get_type_name() const
-{
+std::string_view EntityName::get_type_name() const {
return ceph_entity_type_name(type);
}
-const std::string &EntityName::
-get_id() const
-{
+const std::string &EntityName::get_id() const {
return id;
}
-bool EntityName::
-has_default_id() const
-{
+bool EntityName::has_default_id() const {
return (id == "admin");
}
-std::string EntityName::
-get_valid_types_as_str()
-{
+std::string EntityName::get_valid_types_as_str() {
std::ostringstream out;
size_t i;
for (i = 0; i < STR_TO_ENTITY_TYPE.size(); ++i) {
diff --git a/src/common/entity_name.h b/src/common/entity_name.h
index c88ebcbbabd..53f8cd4d5d0 100644
--- a/src/common/entity_name.h
+++ b/src/common/entity_name.h
@@ -41,7 +41,8 @@ struct EntityName
decode(id_, bl);
set(type_, id_);
}
-
+ void dump(ceph::Formatter *f) const;
+ static void generate_test_instances(std::list<EntityName*>& ls);
const std::string& to_str() const;
const char *to_cstr() const;
bool from_str(std::string_view s);
diff --git a/src/common/mempool.cc b/src/common/mempool.cc
index 79354f70821..4ecfaf81fc6 100644
--- a/src/common/mempool.cc
+++ b/src/common/mempool.cc
@@ -15,9 +15,12 @@
#include "include/mempool.h"
#include "include/demangle.h"
+#if defined(_GNU_SOURCE) && defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
+#else
// Thread local variables should save index, not &shard[index],
// because shard[] is defined in the class
static thread_local size_t thread_shard_index = mempool::num_shards;
+#endif
// default to debug_mode off
bool mempool::debug_mode = false;
@@ -95,9 +98,21 @@ size_t mempool::pool_t::allocated_items() const
void mempool::pool_t::adjust_count(ssize_t items, ssize_t bytes)
{
- thread_shard_index = (thread_shard_index == num_shards) ? pick_a_shard_int() : thread_shard_index;
- shard[thread_shard_index].items += items;
- shard[thread_shard_index].bytes += bytes;
+#if defined(_GNU_SOURCE) && defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
+ // the expected path: we alway pick the shard for a cpu core
+ // a thread is executing on.
+ const size_t shard_index = pick_a_shard_int();
+#else
+ // fallback for lack of sched_getcpu()
+ const size_t shard_index = []() {
+ if (thread_shard_index == num_shards) {
+ thread_shard_index = pick_a_shard_int();
+ }
+ return thread_shard_index;
+ }();
+#endif
+ shard[shard_index].items += items;
+ shard[shard_index].bytes += bytes;
}
void mempool::pool_t::get_stats(
@@ -113,8 +128,17 @@ void mempool::pool_t::get_stats(
for (auto &p : type_map) {
std::string n = ceph_demangle(p.second.type_name);
stats_t &s = (*by_type)[n];
+#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
+ s.bytes = 0;
+ s.items = 0;
+ for (size_t i = 0 ; i < num_shards; ++i) {
+ s.bytes += p.second.shards[i].items * p.second.item_size;
+ s.items += p.second.shards[i].items;
+ }
+#else
s.bytes = p.second.items * p.second.item_size;
s.items = p.second.items;
+#endif
}
}
}
diff --git a/src/common/options/rgw.yaml.in b/src/common/options/rgw.yaml.in
index b03e4449ab1..3f0f691e84e 100644
--- a/src/common/options/rgw.yaml.in
+++ b/src/common/options/rgw.yaml.in
@@ -359,7 +359,11 @@ options:
type: str
level: advanced
desc: Lifecycle allowed work time
- long_desc: Local time window in which the lifecycle maintenance thread can work.
+ long_desc: Local time window in which the lifecycle maintenance thread can work. It expects
+ 24-hour time notation. For example, "00:00-23:59" means starting at midnight lifecycle
+ is allowed to run for the whole day (24 hours). When lifecycle completes, it waits for the
+ next maintenance window. In this example, if it completes at 01:00, it will resume processing
+ 23 hours later at the following midnight.
default: 00:00-06:00
services:
- rgw
diff --git a/src/common/versioned_variant.h b/src/common/versioned_variant.h
new file mode 100644
index 00000000000..124c5883916
--- /dev/null
+++ b/src/common/versioned_variant.h
@@ -0,0 +1,234 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright contributors to the Ceph project
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <concepts>
+#include <limits>
+#include <list>
+#include <variant>
+
+#include <boost/mp11/algorithm.hpp> // for mp_with_index
+#include "include/encoding.h"
+
+/// \file
+/// \brief Contains binary encoding strategies for std::variant.
+
+namespace ceph {
+
+// null encoding for std::monostate
+inline void encode(const std::monostate&, bufferlist& bl) {}
+inline void decode(std::monostate&, bufferlist::const_iterator& p) {}
+
+// largest value that can be represented by `__u8 struct_v`
+inline constexpr size_t max_version = std::numeric_limits<__u8>::max();
+
+/// \namespace versioned_variant
+/// \brief A backward-compatible binary encoding for std::variant.
+///
+/// The variant index is encoded in struct_v so the correct decoder can be
+/// selected. This means that existing variant types cannot be changed or
+/// removed without breaking the decode of earlier ceph versions. New types
+/// can only be added to the end of the variant.
+///
+/// In addition to struct_v, the variant index is also encoded in compatv. As
+/// the variant is extended, this means that existing decoders can continue to
+/// decode the types they recognize, but reject the encodings of new types they
+/// don't.
+///
+/// The variant types themselves are free to change their encodings, provided
+/// they manage their own versioning. The types must be default-constructible
+/// so they can be constructed before decode.
+///
+/// The contained encode/decode functions won't be found by argument-dependent
+/// lookup, so you must either qualify the calls with `versioned_variant::` or
+/// add `using namespace versioned_variant` to the calling scope.
+namespace versioned_variant {
+
+// Requirements for the list of types for versioned std::variant encoding.
+template <typename ...Ts>
+concept valid_types = requires {
+ sizeof...(Ts) > 0; // variant cannot be empty
+ sizeof...(Ts) <= max_version; // index must fit in u8
+ requires (std::default_initializable<Ts> && ...); // default-constructible
+ };
+
+/// \brief A versioned_variant encoder.
+///
+/// Example:
+/// \code
+/// struct example {
+/// std::variant<int, bool> value;
+///
+/// void encode(bufferlist& bl) const {
+/// ENCODE_START(0, 0, bl);
+/// ceph::versioned_variant::encode(value, bl);
+/// ...
+/// \endcode
+template <typename ...Ts> requires valid_types<Ts...>
+void encode(const std::variant<Ts...>& v, bufferlist& bl, uint64_t features=0)
+{
+ // encode the variant index in struct_v and compatv
+ const uint8_t ver = static_cast<uint8_t>(v.index());
+ ENCODE_START(ver, ver, bl);
+ // use the variant type's encoder
+ std::visit([&bl] (const auto& value) mutable {
+ encode(value, bl);
+ }, v);
+ ENCODE_FINISH(bl);
+}
+
+/// \brief A versioned_variant decoder.
+///
+/// Example:
+/// \code
+/// struct example {
+/// std::variant<int, bool> value;
+///
+/// void decode(bufferlist::const_iterator& bl) const {
+/// DECODE_START(0, bl);
+/// ceph::versioned_variant::decode(value, bl);
+/// ...
+/// \endcode
+template <typename ...Ts> requires valid_types<Ts...>
+void decode(std::variant<Ts...>& v, bufferlist::const_iterator& p)
+{
+ constexpr uint8_t max_version = sizeof...(Ts) - 1;
+ DECODE_START(max_version, p);
+ // use struct_v as an index into the variant after converting it into a
+ // compile-time index I
+ const uint8_t index = struct_v;
+ boost::mp11::mp_with_index<sizeof...(Ts)>(index, [&v, &p] (auto I) {
+ // default-construct the type at index I and call its decoder
+ decode(v.template emplace<I>(), p);
+ });
+ DECODE_FINISH(p);
+}
+
+} // namespace versioned_variant
+
+
+/// \namespace converted_variant
+/// \brief A std::variant<T, ...> encoding that is backward-compatible with T.
+///
+/// The encoding works the same as versioned_variant, except that a block of
+/// version numbers are reserved for the first type T to allow its encoding
+/// to continue evolving. T must itself use versioned encoding (ie
+/// ENCODE_START/FINISH).
+///
+/// This encoding strategy allows a serialized type T to be transparently
+/// converted into a variant that can represent other types too.
+namespace converted_variant {
+
+// For converted variants, reserve the first 128 versions for the original
+// type. Variant types after the first use the version numbers above this.
+inline constexpr uint8_t converted_max_version = 128;
+
+// Requirements for the list of types for converted std::variant encoding.
+template <typename ...Ts>
+concept valid_types = requires {
+ sizeof...(Ts) > 0; // variant cannot be empty
+ sizeof...(Ts) <= (max_version - converted_max_version); // index must fit in u8
+ requires (std::default_initializable<Ts> && ...); // default-constructible
+ };
+
+/// \brief A converted_variant encoder.
+///
+/// Example:
+/// \code
+/// struct example {
+/// std::variant<int, bool> value; // replaced `int value`
+///
+/// void encode(bufferlist& bl) const {
+/// ENCODE_START(1, 0, bl);
+/// ceph::converted_variant::encode(value, bl);
+/// ...
+/// \endcode
+template <typename ...Ts> requires valid_types<Ts...>
+void encode(const std::variant<Ts...>& v, bufferlist& bl, uint64_t features=0)
+{
+ const uint8_t index = static_cast<uint8_t>(v.index());
+ if (index == 0) {
+ // encode the first type with its own versioning scheme
+ encode(std::get<0>(v), bl);
+ return;
+ }
+
+ // encode the variant index in struct_v and compatv
+ const uint8_t ver = converted_max_version + index;
+ ENCODE_START(ver, ver, bl);
+ // use the variant type's encoder
+ std::visit([&bl] (const auto& value) mutable {
+ encode(value, bl);
+ }, v);
+ ENCODE_FINISH(bl);
+}
+
+/// \brief A converted_variant decoder.
+///
+/// Example:
+/// \code
+/// struct example {
+/// std::variant<int, bool> value; // replaced `int value`
+///
+/// void decode(bufferlist::const_iterator& bl) {
+/// DECODE_START(1, bl);
+/// ceph::converted_variant::decode(value, bl);
+/// ...
+/// \endcode
+template <typename ...Ts> requires valid_types<Ts...>
+void decode(std::variant<Ts...>& v, bufferlist::const_iterator& p)
+{
+ // save the iterator position so the first type can restart decode
+ const bufferlist::const_iterator prev = p;
+
+ constexpr uint8_t max_version = converted_max_version + sizeof...(Ts) - 1;
+ DECODE_START(max_version, p);
+ if (struct_v <= converted_max_version) {
+ p = prev; // rewind and use type 0's DECODE_START/FINISH
+ decode(v.template emplace<0>(), p);
+ return;
+ }
+
+ // use struct_v as an index into the variant after converting it into a
+ // compile-time index I
+ const uint8_t index = struct_v - converted_max_version;
+ boost::mp11::mp_with_index<sizeof...(Ts)>(index, [&v, &p] (auto I) {
+ // default-construct the type at index I and call its decoder
+ decode(v.template emplace<I>(), p);
+ });
+ DECODE_FINISH(p);
+}
+
+} // namespace converted_variant
+
+
+/// \brief Generate a list with a default-constructed variant of each type.
+///
+/// This can be used in generate_test_instances() for types that contain
+/// variants to ensure that an encoding of each type is present in the
+/// ceph-object-corpus. This allows the ceph-dencoder tests to catch any
+/// breaking changes to the variant types that are present in encodings.
+template <typename ...Ts>
+void generate_test_instances(std::list<std::variant<Ts...>>& instances)
+{
+ // use an immediately-invoked lambda to get a parameter pack of variant indices
+ [&instances] <std::size_t ...I> (std::index_sequence<I...>) {
+ // use a fold expression to call emplace_back() for each index in the pack
+ // use in_place_index to default-construct a variant of the type at index I
+ (instances.emplace_back(std::in_place_index<I>), ...);
+ } (std::make_index_sequence<sizeof...(Ts)>{});
+}
+
+} // namespace ceph
diff --git a/src/crimson/common/shared_lru.h b/src/crimson/common/shared_lru.h
index 186f02a614c..92d99d332c4 100644
--- a/src/crimson/common/shared_lru.h
+++ b/src/crimson/common/shared_lru.h
@@ -83,6 +83,7 @@ public:
cache.clear();
}
shared_ptr_t find(const K& key);
+ K cached_key_lower_bound();
// return the last element that is not greater than key
shared_ptr_t lower_bound(const K& key);
// return the first element that is greater than key
@@ -147,6 +148,15 @@ SharedLRU<K,V>::find(const K& key)
}
template<class K, class V>
+K SharedLRU<K,V>::cached_key_lower_bound()
+{
+ if (weak_refs.empty()) {
+ return {};
+ }
+ return weak_refs.begin()->first;
+}
+
+template<class K, class V>
typename SharedLRU<K,V>::shared_ptr_t
SharedLRU<K,V>::lower_bound(const K& key)
{
diff --git a/src/crimson/common/smp_helpers.h b/src/crimson/common/smp_helpers.h
index c2b7bd9641a..429c938229b 100644
--- a/src/crimson/common/smp_helpers.h
+++ b/src/crimson/common/smp_helpers.h
@@ -3,10 +3,16 @@
#pragma once
+#include <concepts>
#include <limits>
+#include <optional>
+#include <type_traits>
+#include <vector>
+#include <seastar/core/shared_future.hh>
#include <seastar/core/smp.hh>
+#include "common/likely.h"
#include "crimson/common/errorator.h"
#include "crimson/common/utility.h"
@@ -89,4 +95,142 @@ auto sharded_map_seq(T &t, F &&f) {
});
}
-}
+enum class crosscore_type_t {
+ ONE, // from 1 to 1 core
+ ONE_N, // from 1 to n cores
+ N_ONE, // from n to 1 core
+};
+
+/**
+ * smp_crosscore_ordering_t
+ *
+ * To preserve the event order from source to target core(s).
+ */
+template <crosscore_type_t CTypeValue>
+class smp_crosscore_ordering_t {
+ static constexpr bool IS_ONE = (CTypeValue == crosscore_type_t::ONE);
+ static constexpr bool IS_ONE_N = (CTypeValue == crosscore_type_t::ONE_N);
+ static constexpr bool IS_N_ONE = (CTypeValue == crosscore_type_t::N_ONE);
+ static_assert(IS_ONE || IS_ONE_N || IS_N_ONE);
+
+public:
+ using seq_t = uint64_t;
+
+ smp_crosscore_ordering_t() requires IS_ONE
+ : out_seqs(0) { }
+
+ smp_crosscore_ordering_t() requires (!IS_ONE)
+ : out_seqs(seastar::smp::count, 0),
+ in_controls(seastar::smp::count) {}
+
+ ~smp_crosscore_ordering_t() = default;
+
+ /*
+ * Called by the original core to get the ordering sequence
+ */
+
+ seq_t prepare_submit() requires IS_ONE {
+ return do_prepare_submit(out_seqs);
+ }
+
+ seq_t prepare_submit(core_id_t target_core) requires IS_ONE_N {
+ return do_prepare_submit(out_seqs[target_core]);
+ }
+
+ seq_t prepare_submit() requires IS_N_ONE {
+ return do_prepare_submit(out_seqs[seastar::this_shard_id()]);
+ }
+
+ /*
+ * Called by the target core to preserve the ordering
+ */
+
+ seq_t get_in_seq() const requires IS_ONE {
+ return in_controls.seq;
+ }
+
+ seq_t get_in_seq() const requires IS_ONE_N {
+ return in_controls[seastar::this_shard_id()].seq;
+ }
+
+ seq_t get_in_seq(core_id_t source_core) const requires IS_N_ONE {
+ return in_controls[source_core].seq;
+ }
+
+ bool proceed_or_wait(seq_t seq) requires IS_ONE {
+ return in_controls.proceed_or_wait(seq);
+ }
+
+ bool proceed_or_wait(seq_t seq) requires IS_ONE_N {
+ return in_controls[seastar::this_shard_id()].proceed_or_wait(seq);
+ }
+
+ bool proceed_or_wait(seq_t seq, core_id_t source_core) requires IS_N_ONE {
+ return in_controls[source_core].proceed_or_wait(seq);
+ }
+
+ seastar::future<> wait(seq_t seq) requires IS_ONE {
+ return in_controls.wait(seq);
+ }
+
+ seastar::future<> wait(seq_t seq) requires IS_ONE_N {
+ return in_controls[seastar::this_shard_id()].wait(seq);
+ }
+
+ seastar::future<> wait(seq_t seq, core_id_t source_core) requires IS_N_ONE {
+ return in_controls[source_core].wait(seq);
+ }
+
+ void reset_wait() requires IS_N_ONE {
+ for (auto &in_control : in_controls) {
+ in_control.reset_wait();
+ }
+ }
+
+private:
+ struct in_control_t {
+ seq_t seq = 0;
+ std::optional<seastar::shared_promise<>> pr_wait;
+
+ bool proceed_or_wait(seq_t in_seq) {
+ if (in_seq == seq + 1) {
+ ++seq;
+ reset_wait();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ seastar::future<> wait(seq_t in_seq) {
+ assert(in_seq != seq + 1);
+ if (!pr_wait.has_value()) {
+ pr_wait = seastar::shared_promise<>();
+ }
+ return pr_wait->get_shared_future();
+ }
+
+ void reset_wait() {
+ if (unlikely(pr_wait.has_value())) {
+ pr_wait->set_value();
+ pr_wait = std::nullopt;
+ }
+ }
+ };
+
+ seq_t do_prepare_submit(seq_t &out_seq) {
+ return ++out_seq;
+ }
+
+ std::conditional_t<
+ IS_ONE,
+ seq_t, std::vector<seq_t>
+ > out_seqs;
+
+ std::conditional_t<
+ IS_ONE,
+ in_control_t, std::vector<in_control_t>
+ > in_controls;
+};
+
+} // namespace crimson
diff --git a/src/crimson/mgr/client.h b/src/crimson/mgr/client.h
index 501949768dd..b88c60c5e4a 100644
--- a/src/crimson/mgr/client.h
+++ b/src/crimson/mgr/client.h
@@ -24,7 +24,7 @@ namespace crimson::mgr
// implement WithStats if you want to report stats to mgr periodically
class WithStats {
public:
- virtual seastar::future<MessageURef> get_stats() const = 0;
+ virtual seastar::future<MessageURef> get_stats() = 0;
virtual ~WithStats() {}
};
diff --git a/src/crimson/net/Connection.h b/src/crimson/net/Connection.h
index 7141e20f476..c19bfb1ff57 100644
--- a/src/crimson/net/Connection.h
+++ b/src/crimson/net/Connection.h
@@ -81,10 +81,35 @@ class Connection : public seastar::enable_shared_from_this<Connection> {
*
* Send a message over a connection that has completed its handshake.
*
- * May be invoked from any core, but that requires to chain the returned
- * future to preserve ordering.
+ * May be invoked from any core, and the send order will be preserved upon
+ * the call.
+ *
+ * The returned future will be resolved only after the message is enqueued
+ * remotely.
*/
- virtual seastar::future<> send(MessageURef msg) = 0;
+ virtual seastar::future<> send(
+ MessageURef msg) = 0;
+
+ /**
+ * send_with_throttling
+ *
+ * Send a message over a connection that has completed its handshake.
+ *
+ * May be invoked from any core, and the send order will be preserved upon
+ * the call.
+ *
+ * TODO:
+ *
+ * The returned future is reserved for throttling.
+ *
+ * Gating is needed for graceful shutdown, to wait until the message is
+ * enqueued remotely.
+ */
+ seastar::future<> send_with_throttling(
+ MessageURef msg /* , seastar::gate & */) {
+ std::ignore = send(std::move(msg));
+ return seastar::now();
+ }
/**
* send_keepalive
@@ -92,8 +117,8 @@ class Connection : public seastar::enable_shared_from_this<Connection> {
* Send a keepalive message over a connection that has completed its
* handshake.
*
- * May be invoked from any core, but that requires to chain the returned
- * future to preserve ordering.
+ * May be invoked from any core, and the send order will be preserved upon
+ * the call.
*/
virtual seastar::future<> send_keepalive() = 0;
diff --git a/src/crimson/net/Fwd.h b/src/crimson/net/Fwd.h
index 2b159514193..3a56cf5bb0a 100644
--- a/src/crimson/net/Fwd.h
+++ b/src/crimson/net/Fwd.h
@@ -21,7 +21,7 @@
#include <seastar/core/sharded.hh>
#include "msg/Connection.h"
-#include "msg/MessageRef.h"
+#include "msg/Message.h"
#include "msg/msg_types.h"
#include "crimson/common/errorator.h"
diff --git a/src/crimson/net/ProtocolV2.cc b/src/crimson/net/ProtocolV2.cc
index 55b669384ed..d4ef3881c40 100644
--- a/src/crimson/net/ProtocolV2.cc
+++ b/src/crimson/net/ProtocolV2.cc
@@ -2073,7 +2073,7 @@ void ProtocolV2::trigger_replacing(bool reconnect,
// READY state
seastar::future<> ProtocolV2::notify_out_fault(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
const char *where,
std::exception_ptr eptr,
io_handler_state _io_states)
@@ -2121,7 +2121,7 @@ void ProtocolV2::execute_standby()
}
seastar::future<> ProtocolV2::notify_out(
- crosscore_t::seq_t cc_seq)
+ cc_seq_t cc_seq)
{
assert(seastar::this_shard_id() == conn.get_messenger_shard_id());
if (!crosscore.proceed_or_wait(cc_seq)) {
@@ -2210,7 +2210,7 @@ void ProtocolV2::execute_server_wait()
// CLOSING state
seastar::future<> ProtocolV2::notify_mark_down(
- crosscore_t::seq_t cc_seq)
+ cc_seq_t cc_seq)
{
assert(seastar::this_shard_id() == conn.get_messenger_shard_id());
if (!crosscore.proceed_or_wait(cc_seq)) {
diff --git a/src/crimson/net/ProtocolV2.h b/src/crimson/net/ProtocolV2.h
index dd7a1e7039b..4262bbbc70c 100644
--- a/src/crimson/net/ProtocolV2.h
+++ b/src/crimson/net/ProtocolV2.h
@@ -29,16 +29,16 @@ public:
*/
private:
seastar::future<> notify_out(
- crosscore_t::seq_t cc_seq) final;
+ cc_seq_t cc_seq) final;
seastar::future<> notify_out_fault(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
const char *where,
std::exception_ptr,
io_handler_state) final;
seastar::future<> notify_mark_down(
- crosscore_t::seq_t cc_seq) final;
+ cc_seq_t cc_seq) final;
/*
* as ProtocolV2 to be called by SocketConnection
@@ -251,7 +251,7 @@ private:
// asynchronously populated from io_handler
io_handler_state io_states;
- crosscore_t crosscore;
+ proto_crosscore_ordering_t crosscore;
bool has_socket = false;
diff --git a/src/crimson/net/SocketConnection.cc b/src/crimson/net/SocketConnection.cc
index 57e5c12c1ae..76719268277 100644
--- a/src/crimson/net/SocketConnection.cc
+++ b/src/crimson/net/SocketConnection.cc
@@ -79,16 +79,13 @@ bool SocketConnection::peer_wins() const
return (messenger.get_myaddr() > peer_addr || policy.server);
}
-seastar::future<> SocketConnection::send(MessageURef _msg)
+seastar::future<> SocketConnection::send(MessageURef msg)
{
- // may be invoked from any core
- MessageFRef msg = seastar::make_foreign(std::move(_msg));
return io_handler->send(std::move(msg));
}
seastar::future<> SocketConnection::send_keepalive()
{
- // may be invoked from any core
return io_handler->send_keepalive();
}
diff --git a/src/crimson/net/SocketConnection.h b/src/crimson/net/SocketConnection.h
index 823d6c574da..7d20f68867e 100644
--- a/src/crimson/net/SocketConnection.h
+++ b/src/crimson/net/SocketConnection.h
@@ -54,7 +54,7 @@ public:
virtual bool is_connected() const = 0;
- virtual seastar::future<> send(MessageFRef) = 0;
+ virtual seastar::future<> send(MessageURef) = 0;
virtual seastar::future<> send_keepalive() = 0;
diff --git a/src/crimson/net/io_handler.cc b/src/crimson/net/io_handler.cc
index c414c48e12f..b9b0339f944 100644
--- a/src/crimson/net/io_handler.cc
+++ b/src/crimson/net/io_handler.cc
@@ -160,84 +160,132 @@ IOHandler::sweep_out_pending_msgs_to_sent(
#endif
}
-seastar::future<> IOHandler::send(MessageFRef msg)
+seastar::future<> IOHandler::send(MessageURef _msg)
{
+ // may be invoked from any core
+ MessageFRef msg = seastar::make_foreign(std::move(_msg));
+ auto cc_seq = io_crosscore.prepare_submit();
+ auto source_core = seastar::this_shard_id();
// sid may be changed on-the-fly during the submission
- if (seastar::this_shard_id() == get_shard_id()) {
- return do_send(std::move(msg));
+ if (source_core == get_shard_id()) {
+ return do_send(cc_seq, source_core, std::move(msg));
} else {
- logger().trace("{} send() is directed to {} -- {}",
- conn, get_shard_id(), *msg);
+ logger().trace("{} send() {} is directed to core {} -- {}",
+ conn, cc_seq, get_shard_id(), *msg);
return seastar::smp::submit_to(
- get_shard_id(), [this, msg=std::move(msg)]() mutable {
- return send_redirected(std::move(msg));
+ get_shard_id(),
+ [this, cc_seq, source_core, msg=std::move(msg)]() mutable {
+ return send_recheck_shard(cc_seq, source_core, std::move(msg));
});
}
}
-seastar::future<> IOHandler::send_redirected(MessageFRef msg)
+seastar::future<> IOHandler::send_recheck_shard(
+ cc_seq_t cc_seq,
+ core_id_t source_core,
+ MessageFRef msg)
{
// sid may be changed on-the-fly during the submission
if (seastar::this_shard_id() == get_shard_id()) {
- return do_send(std::move(msg));
+ return do_send(cc_seq, source_core, std::move(msg));
} else {
- logger().debug("{} send() is redirected to {} -- {}",
- conn, get_shard_id(), *msg);
+ logger().debug("{} send_recheck_shard() {} "
+ "is redirected from core {} to {} -- {}",
+ conn, cc_seq, source_core, get_shard_id(), *msg);
return seastar::smp::submit_to(
- get_shard_id(), [this, msg=std::move(msg)]() mutable {
- return send_redirected(std::move(msg));
+ get_shard_id(),
+ [this, cc_seq, source_core, msg=std::move(msg)]() mutable {
+ return send_recheck_shard(cc_seq, source_core, std::move(msg));
});
}
}
-seastar::future<> IOHandler::do_send(MessageFRef msg)
+seastar::future<> IOHandler::do_send(
+ cc_seq_t cc_seq,
+ core_id_t source_core,
+ MessageFRef msg)
{
assert(seastar::this_shard_id() == get_shard_id());
- logger().trace("{} do_send() got message -- {}", conn, *msg);
- if (get_io_state() != io_state_t::drop) {
- out_pending_msgs.push_back(std::move(msg));
- notify_out_dispatch();
+ if (io_crosscore.proceed_or_wait(cc_seq, source_core)) {
+ logger().trace("{} do_send() got {} from core {}: send message -- {}",
+ conn, cc_seq, source_core, *msg);
+ if (get_io_state() != io_state_t::drop) {
+ out_pending_msgs.push_back(std::move(msg));
+ notify_out_dispatch();
+ }
+ return seastar::now();
+ } else {
+ logger().debug("{} do_send() got {} from core {}, wait at {} -- {}",
+ conn, cc_seq, source_core,
+ io_crosscore.get_in_seq(source_core),
+ *msg);
+ return io_crosscore.wait(cc_seq, source_core
+ ).then([this, cc_seq, source_core, msg=std::move(msg)]() mutable {
+ return send_recheck_shard(cc_seq, source_core, std::move(msg));
+ });
}
- return seastar::now();
}
seastar::future<> IOHandler::send_keepalive()
{
+ // may be invoked from any core
+ auto cc_seq = io_crosscore.prepare_submit();
+ auto source_core = seastar::this_shard_id();
// sid may be changed on-the-fly during the submission
- if (seastar::this_shard_id() == get_shard_id()) {
- return do_send_keepalive();
+ if (source_core == get_shard_id()) {
+ return do_send_keepalive(cc_seq, source_core);
} else {
- logger().trace("{} send_keepalive() is directed to {}", conn, get_shard_id());
+ logger().trace("{} send_keepalive() {} is directed to core {}",
+ conn, cc_seq, get_shard_id());
return seastar::smp::submit_to(
- get_shard_id(), [this] {
- return send_keepalive_redirected();
+ get_shard_id(),
+ [this, cc_seq, source_core] {
+ return send_keepalive_recheck_shard(cc_seq, source_core);
});
}
}
-seastar::future<> IOHandler::send_keepalive_redirected()
+seastar::future<> IOHandler::send_keepalive_recheck_shard(
+ cc_seq_t cc_seq,
+ core_id_t source_core)
{
// sid may be changed on-the-fly during the submission
if (seastar::this_shard_id() == get_shard_id()) {
- return do_send_keepalive();
+ return do_send_keepalive(cc_seq, source_core);
} else {
- logger().debug("{} send_keepalive() is redirected to {}", conn, get_shard_id());
+ logger().debug("{} send_keepalive_recheck_shard() {} "
+ "is redirected from core {} to {}",
+ conn, cc_seq, source_core, get_shard_id());
return seastar::smp::submit_to(
- get_shard_id(), [this] {
- return send_keepalive_redirected();
+ get_shard_id(),
+ [this, cc_seq, source_core] {
+ return send_keepalive_recheck_shard(cc_seq, source_core);
});
}
}
-seastar::future<> IOHandler::do_send_keepalive()
+seastar::future<> IOHandler::do_send_keepalive(
+ cc_seq_t cc_seq,
+ core_id_t source_core)
{
assert(seastar::this_shard_id() == get_shard_id());
- logger().trace("{} do_send_keeplive(): need_keepalive={}", conn, need_keepalive);
- if (!need_keepalive) {
- need_keepalive = true;
- notify_out_dispatch();
+ if (io_crosscore.proceed_or_wait(cc_seq, source_core)) {
+ logger().trace("{} do_send_keeplive() got {} from core {}: need_keepalive={}",
+ conn, cc_seq, source_core, need_keepalive);
+ if (!need_keepalive) {
+ need_keepalive = true;
+ notify_out_dispatch();
+ }
+ return seastar::now();
+ } else {
+ logger().debug("{} do_send_keepalive() got {} from core {}, wait at {}",
+ conn, cc_seq, source_core,
+ io_crosscore.get_in_seq(source_core));
+ return io_crosscore.wait(cc_seq, source_core
+ ).then([this, cc_seq, source_core] {
+ return send_keepalive_recheck_shard(cc_seq, source_core);
+ });
}
- return seastar::now();
}
void IOHandler::mark_down()
@@ -249,7 +297,7 @@ void IOHandler::mark_down()
return;
}
- auto cc_seq = crosscore.prepare_submit();
+ auto cc_seq = proto_crosscore.prepare_submit();
logger().info("{} mark_down() at {}, send {} notify_mark_down()",
conn, io_stat_printer{*this}, cc_seq);
do_set_io_state(io_state_t::drop);
@@ -292,7 +340,7 @@ void IOHandler::assign_frame_assembler(FrameAssemblerV2Ref fa)
void IOHandler::do_set_io_state(
io_state_t new_state,
- std::optional<crosscore_t::seq_t> cc_seq,
+ std::optional<cc_seq_t> cc_seq,
FrameAssemblerV2Ref fa,
bool set_notify_out)
{
@@ -363,16 +411,16 @@ void IOHandler::do_set_io_state(
}
seastar::future<> IOHandler::set_io_state(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
io_state_t new_state,
FrameAssemblerV2Ref fa,
bool set_notify_out)
{
assert(seastar::this_shard_id() == get_shard_id());
- if (!crosscore.proceed_or_wait(cc_seq)) {
+ if (!proto_crosscore.proceed_or_wait(cc_seq)) {
logger().debug("{} got {} set_io_state(), wait at {}",
- conn, cc_seq, crosscore.get_in_seq());
- return crosscore.wait(cc_seq
+ conn, cc_seq, proto_crosscore.get_in_seq());
+ return proto_crosscore.wait(cc_seq
).then([this, cc_seq, new_state,
fa=std::move(fa), set_notify_out]() mutable {
return set_io_state(cc_seq, new_state, std::move(fa), set_notify_out);
@@ -385,13 +433,13 @@ seastar::future<> IOHandler::set_io_state(
seastar::future<IOHandler::exit_dispatching_ret>
IOHandler::wait_io_exit_dispatching(
- crosscore_t::seq_t cc_seq)
+ cc_seq_t cc_seq)
{
assert(seastar::this_shard_id() == get_shard_id());
- if (!crosscore.proceed_or_wait(cc_seq)) {
+ if (!proto_crosscore.proceed_or_wait(cc_seq)) {
logger().debug("{} got {} wait_io_exit_dispatching(), wait at {}",
- conn, cc_seq, crosscore.get_in_seq());
- return crosscore.wait(cc_seq
+ conn, cc_seq, proto_crosscore.get_in_seq());
+ return proto_crosscore.wait(cc_seq
).then([this, cc_seq] {
return wait_io_exit_dispatching(cc_seq);
});
@@ -429,14 +477,14 @@ IOHandler::wait_io_exit_dispatching(
}
seastar::future<> IOHandler::reset_session(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
bool full)
{
assert(seastar::this_shard_id() == get_shard_id());
- if (!crosscore.proceed_or_wait(cc_seq)) {
+ if (!proto_crosscore.proceed_or_wait(cc_seq)) {
logger().debug("{} got {} reset_session(), wait at {}",
- conn, cc_seq, crosscore.get_in_seq());
- return crosscore.wait(cc_seq
+ conn, cc_seq, proto_crosscore.get_in_seq());
+ return proto_crosscore.wait(cc_seq
).then([this, cc_seq, full] {
return reset_session(cc_seq, full);
});
@@ -454,13 +502,13 @@ seastar::future<> IOHandler::reset_session(
}
seastar::future<> IOHandler::reset_peer_state(
- crosscore_t::seq_t cc_seq)
+ cc_seq_t cc_seq)
{
assert(seastar::this_shard_id() == get_shard_id());
- if (!crosscore.proceed_or_wait(cc_seq)) {
+ if (!proto_crosscore.proceed_or_wait(cc_seq)) {
logger().debug("{} got {} reset_peer_state(), wait at {}",
- conn, cc_seq, crosscore.get_in_seq());
- return crosscore.wait(cc_seq
+ conn, cc_seq, proto_crosscore.get_in_seq());
+ return proto_crosscore.wait(cc_seq
).then([this, cc_seq] {
return reset_peer_state(cc_seq);
});
@@ -476,13 +524,13 @@ seastar::future<> IOHandler::reset_peer_state(
}
seastar::future<> IOHandler::requeue_out_sent(
- crosscore_t::seq_t cc_seq)
+ cc_seq_t cc_seq)
{
assert(seastar::this_shard_id() == get_shard_id());
- if (!crosscore.proceed_or_wait(cc_seq)) {
+ if (!proto_crosscore.proceed_or_wait(cc_seq)) {
logger().debug("{} got {} requeue_out_sent(), wait at {}",
- conn, cc_seq, crosscore.get_in_seq());
- return crosscore.wait(cc_seq
+ conn, cc_seq, proto_crosscore.get_in_seq());
+ return proto_crosscore.wait(cc_seq
).then([this, cc_seq] {
return requeue_out_sent(cc_seq);
});
@@ -517,14 +565,14 @@ void IOHandler::do_requeue_out_sent()
}
seastar::future<> IOHandler::requeue_out_sent_up_to(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
seq_num_t msg_seq)
{
assert(seastar::this_shard_id() == get_shard_id());
- if (!crosscore.proceed_or_wait(cc_seq)) {
+ if (!proto_crosscore.proceed_or_wait(cc_seq)) {
logger().debug("{} got {} requeue_out_sent_up_to(), wait at {}",
- conn, cc_seq, crosscore.get_in_seq());
- return crosscore.wait(cc_seq
+ conn, cc_seq, proto_crosscore.get_in_seq());
+ return proto_crosscore.wait(cc_seq
).then([this, cc_seq, msg_seq] {
return requeue_out_sent_up_to(cc_seq, msg_seq);
});
@@ -583,7 +631,7 @@ void IOHandler::discard_out_sent()
seastar::future<>
IOHandler::dispatch_accept(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
seastar::shard_id new_sid,
ConnectionFRef conn_fref,
bool is_replace)
@@ -593,7 +641,7 @@ IOHandler::dispatch_accept(
seastar::future<>
IOHandler::dispatch_connect(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
seastar::shard_id new_sid,
ConnectionFRef conn_fref)
{
@@ -620,16 +668,16 @@ IOHandler::cleanup_prv_shard(seastar::shard_id prv_sid)
seastar::future<>
IOHandler::to_new_sid(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
seastar::shard_id new_sid,
ConnectionFRef conn_fref,
std::optional<bool> is_replace)
{
ceph_assert_always(seastar::this_shard_id() == get_shard_id());
- if (!crosscore.proceed_or_wait(cc_seq)) {
+ if (!proto_crosscore.proceed_or_wait(cc_seq)) {
logger().debug("{} got {} to_new_sid(), wait at {}",
- conn, cc_seq, crosscore.get_in_seq());
- return crosscore.wait(cc_seq
+ conn, cc_seq, proto_crosscore.get_in_seq());
+ return proto_crosscore.wait(cc_seq
).then([this, cc_seq, new_sid, is_replace,
conn_fref=std::move(conn_fref)]() mutable {
return to_new_sid(cc_seq, new_sid, std::move(conn_fref), is_replace);
@@ -685,6 +733,8 @@ IOHandler::to_new_sid(
shard_states = shard_states_t::create_from_previous(
*maybe_prv_shard_states, new_sid);
assert(new_sid == get_shard_id());
+ // broadcast shard change to all the io waiters, atomically.
+ io_crosscore.reset_wait();
return seastar::smp::submit_to(new_sid,
[this, next_cc_seq, is_dropped, prv_sid, is_replace, conn_fref=std::move(conn_fref)]() mutable {
@@ -699,7 +749,7 @@ IOHandler::to_new_sid(
ceph_assert_always(seastar::this_shard_id() == get_shard_id());
ceph_assert_always(get_io_state() != io_state_t::open);
ceph_assert_always(!maybe_dropped_sid.has_value());
- ceph_assert_always(crosscore.proceed_or_wait(next_cc_seq));
+ ceph_assert_always(proto_crosscore.proceed_or_wait(next_cc_seq));
if (is_dropped) {
ceph_assert_always(get_io_state() == io_state_t::drop);
@@ -735,7 +785,7 @@ IOHandler::to_new_sid(
}
seastar::future<> IOHandler::set_accepted_sid(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
seastar::shard_id sid,
ConnectionFRef conn_fref)
{
@@ -749,7 +799,7 @@ seastar::future<> IOHandler::set_accepted_sid(
return seastar::smp::submit_to(sid,
[this, cc_seq, conn_fref=std::move(conn_fref)]() mutable {
// must be the first to proceed
- ceph_assert_always(crosscore.proceed_or_wait(cc_seq));
+ ceph_assert_always(proto_crosscore.proceed_or_wait(cc_seq));
logger().debug("{} set accepted sid", conn);
ceph_assert_always(seastar::this_shard_id() == get_shard_id());
@@ -875,7 +925,7 @@ IOHandler::do_out_dispatch(shard_states_t &ctx)
}
if (io_state == io_state_t::open) {
- auto cc_seq = crosscore.prepare_submit();
+ auto cc_seq = proto_crosscore.prepare_submit();
logger().info("{} do_out_dispatch(): fault at {}, {}, going to delay -- {}, "
"send {} notify_out_fault()",
conn, io_state, io_stat_printer{*this}, e.what(), cc_seq);
@@ -922,7 +972,7 @@ void IOHandler::notify_out_dispatch()
ceph_assert_always(seastar::this_shard_id() == get_shard_id());
assert(is_out_queued());
if (need_notify_out) {
- auto cc_seq = crosscore.prepare_submit();
+ auto cc_seq = proto_crosscore.prepare_submit();
logger().debug("{} send {} notify_out()",
conn, cc_seq);
shard_states->dispatch_in_background(
@@ -1152,7 +1202,7 @@ void IOHandler::do_in_dispatch()
auto io_state = ctx.get_io_state();
if (io_state == io_state_t::open) {
- auto cc_seq = crosscore.prepare_submit();
+ auto cc_seq = proto_crosscore.prepare_submit();
logger().info("{} do_in_dispatch(): fault at {}, {}, going to delay -- {}, "
"send {} notify_out_fault()",
conn, io_state, io_stat_printer{*this}, e_what, cc_seq);
@@ -1183,15 +1233,15 @@ void IOHandler::do_in_dispatch()
seastar::future<>
IOHandler::close_io(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
bool is_dispatch_reset,
bool is_replace)
{
ceph_assert_always(seastar::this_shard_id() == get_shard_id());
- if (!crosscore.proceed_or_wait(cc_seq)) {
+ if (!proto_crosscore.proceed_or_wait(cc_seq)) {
logger().debug("{} got {} close_io(), wait at {}",
- conn, cc_seq, crosscore.get_in_seq());
- return crosscore.wait(cc_seq
+ conn, cc_seq, proto_crosscore.get_in_seq());
+ return proto_crosscore.wait(cc_seq
).then([this, cc_seq, is_dispatch_reset, is_replace] {
return close_io(cc_seq, is_dispatch_reset, is_replace);
});
diff --git a/src/crimson/net/io_handler.h b/src/crimson/net/io_handler.h
index f53c2ba6468..8b88e2f5a25 100644
--- a/src/crimson/net/io_handler.h
+++ b/src/crimson/net/io_handler.h
@@ -5,10 +5,10 @@
#include <vector>
-#include <seastar/core/shared_future.hh>
#include <seastar/util/later.hh>
#include "crimson/common/gated.h"
+#include "crimson/common/smp_helpers.h"
#include "Fwd.h"
#include "SocketConnection.h"
#include "FrameAssemblerV2.h"
@@ -16,54 +16,6 @@
namespace crimson::net {
/**
- * crosscore_t
- *
- * To preserve the event order across cores.
- */
-class crosscore_t {
-public:
- using seq_t = uint64_t;
-
- crosscore_t() = default;
- ~crosscore_t() = default;
-
- seq_t get_in_seq() const {
- return in_seq;
- }
-
- seq_t prepare_submit() {
- ++out_seq;
- return out_seq;
- }
-
- bool proceed_or_wait(seq_t seq) {
- if (seq == in_seq + 1) {
- ++in_seq;
- if (unlikely(in_pr_wait.has_value())) {
- in_pr_wait->set_value();
- in_pr_wait = std::nullopt;
- }
- return true;
- } else {
- return false;
- }
- }
-
- seastar::future<> wait(seq_t seq) {
- assert(seq != in_seq + 1);
- if (!in_pr_wait.has_value()) {
- in_pr_wait = seastar::shared_promise<>();
- }
- return in_pr_wait->get_shared_future();
- }
-
-private:
- seq_t out_seq = 0;
- seq_t in_seq = 0;
- std::optional<seastar::shared_promise<>> in_pr_wait;
-};
-
-/**
* io_handler_state
*
* It is required to populate the states from IOHandler to ProtocolV2
@@ -118,6 +70,9 @@ struct io_handler_state {
*/
class HandshakeListener {
public:
+ using proto_crosscore_ordering_t = smp_crosscore_ordering_t<crosscore_type_t::ONE>;
+ using cc_seq_t = proto_crosscore_ordering_t::seq_t;
+
virtual ~HandshakeListener() = default;
HandshakeListener(const HandshakeListener&) = delete;
@@ -126,16 +81,16 @@ public:
HandshakeListener &operator=(HandshakeListener &&) = delete;
virtual seastar::future<> notify_out(
- crosscore_t::seq_t cc_seq) = 0;
+ cc_seq_t cc_seq) = 0;
virtual seastar::future<> notify_out_fault(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
const char *where,
std::exception_ptr,
io_handler_state) = 0;
virtual seastar::future<> notify_mark_down(
- crosscore_t::seq_t cc_seq) = 0;
+ cc_seq_t cc_seq) = 0;
protected:
HandshakeListener() = default;
@@ -150,6 +105,10 @@ protected:
*/
class IOHandler final : public ConnectionHandler {
public:
+ using io_crosscore_ordering_t = smp_crosscore_ordering_t<crosscore_type_t::N_ONE>;
+ using proto_crosscore_ordering_t = smp_crosscore_ordering_t<crosscore_type_t::ONE>;
+ using cc_seq_t = proto_crosscore_ordering_t::seq_t;
+
IOHandler(ChainedDispatchers &,
SocketConnection &);
@@ -173,7 +132,7 @@ public:
return protocol_is_connected;
}
- seastar::future<> send(MessageFRef msg) final;
+ seastar::future<> send(MessageURef msg) final;
seastar::future<> send_keepalive() final;
@@ -221,7 +180,7 @@ public:
void print_io_stat(std::ostream &out) const;
seastar::future<> set_accepted_sid(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
seastar::shard_id sid,
ConnectionFRef conn_fref);
@@ -230,7 +189,7 @@ public:
*/
seastar::future<> close_io(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
bool is_dispatch_reset,
bool is_replace);
@@ -251,7 +210,7 @@ public:
friend class fmt::formatter<io_state_t>;
seastar::future<> set_io_state(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
io_state_t new_state,
FrameAssemblerV2Ref fa,
bool set_notify_out);
@@ -262,30 +221,30 @@ public:
};
seastar::future<exit_dispatching_ret>
wait_io_exit_dispatching(
- crosscore_t::seq_t cc_seq);
+ cc_seq_t cc_seq);
seastar::future<> reset_session(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
bool full);
seastar::future<> reset_peer_state(
- crosscore_t::seq_t cc_seq);
+ cc_seq_t cc_seq);
seastar::future<> requeue_out_sent_up_to(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
seq_num_t msg_seq);
seastar::future<> requeue_out_sent(
- crosscore_t::seq_t cc_seq);
+ cc_seq_t cc_seq);
seastar::future<> dispatch_accept(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
seastar::shard_id new_sid,
ConnectionFRef,
bool is_replace);
seastar::future<> dispatch_connect(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
seastar::shard_id new_sid,
ConnectionFRef);
@@ -426,7 +385,7 @@ public:
void do_set_io_state(
io_state_t new_state,
- std::optional<crosscore_t::seq_t> cc_seq = std::nullopt,
+ std::optional<cc_seq_t> cc_seq = std::nullopt,
FrameAssemblerV2Ref fa = nullptr,
bool set_notify_out = false);
@@ -440,16 +399,16 @@ public:
void assign_frame_assembler(FrameAssemblerV2Ref);
- seastar::future<> send_redirected(MessageFRef msg);
+ seastar::future<> send_recheck_shard(cc_seq_t, core_id_t, MessageFRef);
- seastar::future<> do_send(MessageFRef msg);
+ seastar::future<> do_send(cc_seq_t, core_id_t, MessageFRef);
- seastar::future<> send_keepalive_redirected();
+ seastar::future<> send_keepalive_recheck_shard(cc_seq_t, core_id_t);
- seastar::future<> do_send_keepalive();
+ seastar::future<> do_send_keepalive(cc_seq_t, core_id_t);
seastar::future<> to_new_sid(
- crosscore_t::seq_t cc_seq,
+ cc_seq_t cc_seq,
seastar::shard_id new_sid,
ConnectionFRef,
std::optional<bool> is_replace);
@@ -509,7 +468,9 @@ public:
private:
shard_states_ref_t shard_states;
- crosscore_t crosscore;
+ proto_crosscore_ordering_t proto_crosscore;
+
+ io_crosscore_ordering_t io_crosscore;
// drop was happening in the previous sid
std::optional<seastar::shard_id> maybe_dropped_sid;
diff --git a/src/crimson/osd/object_context_loader.cc b/src/crimson/osd/object_context_loader.cc
index 0a4d74c0d70..d7d2b6d98c6 100644
--- a/src/crimson/osd/object_context_loader.cc
+++ b/src/crimson/osd/object_context_loader.cc
@@ -22,7 +22,7 @@ using crimson::common::local_conf;
return get_or_load_obc<State>(obc, existed)
.safe_then_interruptible(
[func = std::move(func)](auto obc) {
- return std::move(func)(std::move(obc));
+ return std::move(func)(obc, obc);
});
}).finally([FNAME, this, obc=std::move(obc)] {
DEBUGDPP("released object {}", dpp, obc->get_oid());
@@ -39,7 +39,7 @@ using crimson::common::local_conf;
assert(!oid.is_head());
return with_obc<RWState::RWREAD>(
oid.get_head(),
- [FNAME, oid, func=std::move(func), this](auto head) mutable
+ [FNAME, oid, func=std::move(func), this](auto head, auto) mutable
-> load_obc_iertr::future<> {
if (!head->obs.exists) {
ERRORDPP("head doesn't exist for object {}", dpp, head->obs.oi.soid);
@@ -70,12 +70,12 @@ using crimson::common::local_conf;
auto [clone, existed] = obc_registry.get_cached_obc(*coid);
return clone->template with_lock<State, IOInterruptCondition>(
[existed=existed, clone=std::move(clone),
- func=std::move(func), head=std::move(head), this]()
+ func=std::move(func), head=std::move(head), this]() mutable
-> load_obc_iertr::future<> {
auto loaded = get_or_load_obc<State>(clone, existed);
return loaded.safe_then_interruptible(
- [func = std::move(func)](auto clone) {
- return std::move(func)(std::move(clone));
+ [func = std::move(func), head=std::move(head)](auto clone) mutable {
+ return std::move(func)(std::move(head), std::move(clone));
});
});
}
@@ -84,13 +84,13 @@ using crimson::common::local_conf;
ObjectContextLoader::load_obc_iertr::future<>
ObjectContextLoader::with_clone_obc_direct(
hobject_t oid,
- with_both_obc_func_t&& func)
+ with_obc_func_t&& func)
{
LOG_PREFIX(ObjectContextLoader::with_clone_obc_direct);
assert(!oid.is_head());
return with_obc<RWState::RWREAD>(
oid.get_head(),
- [FNAME, oid, func=std::move(func), this](auto head) mutable
+ [FNAME, oid, func=std::move(func), this](auto head, auto) mutable
-> load_obc_iertr::future<> {
if (!head->obs.exists) {
ERRORDPP("head doesn't exist for object {}", dpp, head->obs.oi.soid);
@@ -228,5 +228,5 @@ using crimson::common::local_conf;
template ObjectContextLoader::load_obc_iertr::future<>
ObjectContextLoader::with_clone_obc_direct<RWState::RWWRITE>(
hobject_t,
- with_both_obc_func_t&&);
+ with_obc_func_t&&);
}
diff --git a/src/crimson/osd/object_context_loader.h b/src/crimson/osd/object_context_loader.h
index 3ab7f6ad80f..0cd50623abc 100644
--- a/src/crimson/osd/object_context_loader.h
+++ b/src/crimson/osd/object_context_loader.h
@@ -30,9 +30,6 @@ public:
load_obc_ertr>;
using with_obc_func_t =
- std::function<load_obc_iertr::future<> (ObjectContextRef)>;
-
- using with_both_obc_func_t =
std::function<load_obc_iertr::future<> (ObjectContextRef, ObjectContextRef)>;
// Use this variant by default
@@ -55,7 +52,7 @@ public:
template<RWState::State State>
load_obc_iertr::future<> with_clone_obc_direct(
hobject_t oid,
- with_both_obc_func_t&& func);
+ with_obc_func_t&& func);
load_obc_iertr::future<> reload_obc(ObjectContext& obc) const;
diff --git a/src/crimson/osd/ops_executer.cc b/src/crimson/osd/ops_executer.cc
index 040870203bd..09b1a492576 100644
--- a/src/crimson/osd/ops_executer.cc
+++ b/src/crimson/osd/ops_executer.cc
@@ -797,7 +797,7 @@ void OpsExecuter::fill_op_params_bump_pg_version()
{
osd_op_params->req_id = msg->get_reqid();
osd_op_params->mtime = msg->get_mtime();
- osd_op_params->at_version = pg->next_version();
+ osd_op_params->at_version = pg->get_next_version();
osd_op_params->pg_trim_to = pg->get_pg_trim_to();
osd_op_params->min_last_complete_ondisk = pg->get_min_last_complete_ondisk();
osd_op_params->last_complete = pg->get_info().last_complete;
@@ -820,6 +820,7 @@ std::vector<pg_log_entry_t> OpsExecuter::prepare_transaction(
osd_op_params->req_id,
osd_op_params->mtime,
op_info.allows_returnvec() && !ops.empty() ? ops.back().rval.code : 0);
+ osd_op_params->at_version.version++;
if (op_info.allows_returnvec()) {
// also the per-op values are recorded in the pg log
log_entries.back().set_op_returns(ops);
@@ -957,6 +958,7 @@ std::unique_ptr<OpsExecuter::CloningContext> OpsExecuter::execute_clone(
initial_obs.oi.mtime, // will be replaced in `apply_to()`
0
};
+ osd_op_params->at_version.version++;
encode(cloned_snaps, cloning_ctx->log_entry.snaps);
// TODO: update most recent clone_overlap and usage stats
@@ -968,7 +970,7 @@ void OpsExecuter::CloningContext::apply_to(
ObjectContext& processed_obc) &&
{
log_entry.mtime = processed_obc.obs.oi.mtime;
- log_entries.emplace_back(std::move(log_entry));
+ log_entries.insert(log_entries.begin(), std::move(log_entry));
processed_obc.ssc->snapset = std::move(new_snapset);
}
@@ -983,7 +985,7 @@ OpsExecuter::flush_clone_metadata(
auto maybe_snap_mapped = interruptor::now();
if (cloning_ctx) {
std::move(*cloning_ctx).apply_to(log_entries, *obc);
- const auto& coid = log_entries.back().soid;
+ const auto& coid = log_entries.front().soid;
const auto& cloned_snaps = obc->ssc->snapset.clone_snaps[coid.snap];
maybe_snap_mapped = snap_map_clone(
coid,
@@ -1012,7 +1014,7 @@ std::pair<object_info_t, ObjectContextRef> OpsExecuter::prepare_clone(
const hobject_t& coid)
{
object_info_t static_snap_oi(coid);
- static_snap_oi.version = pg->next_version();
+ static_snap_oi.version = osd_op_params->at_version;
static_snap_oi.prior_version = obc->obs.oi.version;
static_snap_oi.copy_user_bits(obc->obs.oi);
if (static_snap_oi.is_whiteout()) {
diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc
index 8da2d566e6b..f3648c6df27 100644
--- a/src/crimson/osd/osd.cc
+++ b/src/crimson/osd/osd.cc
@@ -400,7 +400,11 @@ seastar::future<> OSD::start()
);
}).then([this](OSDSuperblock&& sb) {
superblock = std::move(sb);
- pg_shard_manager.set_superblock(superblock);
+ if (!superblock.cluster_osdmap_trim_lower_bound) {
+ superblock.cluster_osdmap_trim_lower_bound = superblock.get_oldest_map();
+ }
+ return pg_shard_manager.set_superblock(superblock);
+ }).then([this] {
return pg_shard_manager.get_local_map(superblock.current_epoch);
}).then([this](OSDMapService::local_cached_map_t&& map) {
osdmap = make_local_shared_foreign(OSDMapService::local_cached_map_t(map));
@@ -864,6 +868,25 @@ void OSD::handle_authentication(const EntityName& name,
}
}
+const char** OSD::get_tracked_conf_keys() const
+{
+ static const char* KEYS[] = {
+ "osd_beacon_report_interval",
+ nullptr
+ };
+ return KEYS;
+}
+
+void OSD::handle_conf_change(
+ const crimson::common::ConfigProxy& conf,
+ const std::set <std::string> &changed)
+{
+ if (changed.count("osd_beacon_report_interval")) {
+ beacon_timer.rearm_periodic(
+ std::chrono::seconds(conf->osd_beacon_report_interval));
+ }
+}
+
void OSD::update_stats()
{
osd_stat_seq++;
@@ -879,13 +902,20 @@ void OSD::update_stats()
});
}
-seastar::future<MessageURef> OSD::get_stats() const
+seastar::future<MessageURef> OSD::get_stats()
{
// MPGStats::had_map_for is not used since PGMonitor was removed
auto m = crimson::make_message<MPGStats>(monc->get_fsid(), osdmap->get_epoch());
m->osd_stat = osd_stat;
return pg_shard_manager.get_pg_stats(
- ).then([m=std::move(m)](auto &&stats) mutable {
+ ).then([this, m=std::move(m)](auto &&stats) mutable {
+ min_last_epoch_clean = osdmap->get_epoch();
+ min_last_epoch_clean_pgs.clear();
+ for (auto [pgid, stat] : stats) {
+ min_last_epoch_clean = std::min(min_last_epoch_clean,
+ stat.get_effective_last_epoch_clean());
+ min_last_epoch_clean_pgs.push_back(pgid);
+ }
m->pg_stat = std::move(stats);
return seastar::make_ready_future<MessageURef>(std::move(m));
});
@@ -934,6 +964,16 @@ seastar::future<> OSD::_handle_osd_map(Ref<MOSDMap> m)
logger().info("handle_osd_map epochs [{}..{}], i have {}, src has [{}..{}]",
first, last, superblock.get_newest_map(),
m->cluster_osdmap_trim_lower_bound, m->newest_map);
+
+ if (superblock.cluster_osdmap_trim_lower_bound <
+ m->cluster_osdmap_trim_lower_bound) {
+ superblock.cluster_osdmap_trim_lower_bound =
+ m->cluster_osdmap_trim_lower_bound;
+ logger().debug("{} superblock cluster_osdmap_trim_lower_bound new epoch is: {}",
+ __func__, superblock.cluster_osdmap_trim_lower_bound);
+ ceph_assert(
+ superblock.cluster_osdmap_trim_lower_bound >= superblock.get_oldest_map());
+ }
// make sure there is something new, here, before we bother flushing
// the queues and such
if (last <= superblock.get_newest_map()) {
@@ -964,8 +1004,9 @@ seastar::future<> OSD::_handle_osd_map(Ref<MOSDMap> m)
monc->sub_got("osdmap", last);
if (!superblock.maps.empty()) {
- // TODO: support osdmap trimming
- // See: <tracker>
+ pg_shard_manager.trim_maps(t, superblock);
+ // TODO: once we support pg splitting, update pg_num_history here
+ //pg_num_history.prune(superblock.get_oldest_map());
}
superblock.insert_osdmap_epochs(first, last);
@@ -977,11 +1018,13 @@ seastar::future<> OSD::_handle_osd_map(Ref<MOSDMap> m)
superblock.clean_thru = last;
}
pg_shard_manager.get_meta_coll().store_superblock(t, superblock);
- pg_shard_manager.set_superblock(superblock);
- logger().debug("OSD::handle_osd_map: do_transaction...");
- return store.get_sharded_store().do_transaction(
- pg_shard_manager.get_meta_coll().collection(),
- std::move(t));
+ return pg_shard_manager.set_superblock(superblock).then(
+ [this, &t] {
+ logger().debug("OSD::handle_osd_map: do_transaction...");
+ return store.get_sharded_store().do_transaction(
+ pg_shard_manager.get_meta_coll().collection(),
+ std::move(t));
+ });
});
}).then([=, this] {
// TODO: write to superblock and commit the transaction
@@ -1266,14 +1309,13 @@ seastar::future<> OSD::send_beacon()
if (!pg_shard_manager.is_active()) {
return seastar::now();
}
- // FIXME: min lec should be calculated from pg_stat
- // and should set m->pgs
- epoch_t min_last_epoch_clean = osdmap->get_epoch();
- auto m = crimson::make_message<MOSDBeacon>(osdmap->get_epoch(),
+ auto beacon = crimson::make_message<MOSDBeacon>(osdmap->get_epoch(),
min_last_epoch_clean,
superblock.last_purged_snaps_scrub,
local_conf()->osd_beacon_report_interval);
- return monc->send_message(std::move(m));
+ beacon->pgs = min_last_epoch_clean_pgs;
+ logger().debug("{} {}", __func__, *beacon);
+ return monc->send_message(std::move(beacon));
}
seastar::future<> OSD::update_heartbeat_peers()
diff --git a/src/crimson/osd/osd.h b/src/crimson/osd/osd.h
index 10ff60d4701..134376ad947 100644
--- a/src/crimson/osd/osd.h
+++ b/src/crimson/osd/osd.h
@@ -61,7 +61,8 @@ class PG;
class OSD final : public crimson::net::Dispatcher,
private crimson::common::AuthHandler,
- private crimson::mgr::WithStats {
+ private crimson::mgr::WithStats,
+ public md_config_obs_t {
const int whoami;
const uint32_t nonce;
seastar::abort_source& abort_source;
@@ -106,8 +107,11 @@ class OSD final : public crimson::net::Dispatcher,
// pg statistics including osd ones
osd_stat_t osd_stat;
uint32_t osd_stat_seq = 0;
+ epoch_t min_last_epoch_clean = 0;
+ // which pgs were scanned for min_lec
+ std::vector<pg_t> min_last_epoch_clean_pgs;
void update_stats();
- seastar::future<MessageURef> get_stats() const final;
+ seastar::future<MessageURef> get_stats() final;
// AuthHandler methods
void handle_authentication(const EntityName& name,
@@ -123,6 +127,10 @@ class OSD final : public crimson::net::Dispatcher,
std::unique_ptr<Heartbeat> heartbeat;
seastar::timer<seastar::lowres_clock> tick_timer;
+ const char** get_tracked_conf_keys() const final;
+ void handle_conf_change(const ConfigProxy& conf,
+ const std::set<std::string> &changed) final;
+
// admin-socket
seastar::lw_shared_ptr<crimson::admin::AdminSocket> asok;
diff --git a/src/crimson/osd/osd_connection_priv.h b/src/crimson/osd/osd_connection_priv.h
index 2d2a459017b..3c7d085c06e 100644
--- a/src/crimson/osd/osd_connection_priv.h
+++ b/src/crimson/osd/osd_connection_priv.h
@@ -3,8 +3,7 @@
#pragma once
-#include <seastar/core/smp.hh>
-
+#include "crimson/common/smp_helpers.h"
#include "crimson/net/Connection.h"
#include "crimson/osd/osd_operation.h"
#include "crimson/osd/osd_operations/client_request.h"
@@ -13,75 +12,9 @@
namespace crimson::osd {
-/**
- * crosscore_ordering_t
- *
- * To preserve the event order from 1 source to n target cores.
- */
-class crosscore_ordering_t {
-public:
- using seq_t = uint64_t;
-
- crosscore_ordering_t()
- : out_seqs(seastar::smp::count, 0),
- in_controls(seastar::smp::count) {}
-
- ~crosscore_ordering_t() = default;
-
- // Called by the original core to get the ordering sequence
- seq_t prepare_submit(core_id_t target_core) {
- auto &out_seq = out_seqs[target_core];
- ++out_seq;
- return out_seq;
- }
-
- /*
- * Called by the target core to preserve the ordering
- */
-
- seq_t get_in_seq() const {
- auto core = seastar::this_shard_id();
- return in_controls[core].seq;
- }
-
- bool proceed_or_wait(seq_t seq) {
- auto core = seastar::this_shard_id();
- auto &in_control = in_controls[core];
- if (seq == in_control.seq + 1) {
- ++in_control.seq;
- if (unlikely(in_control.pr_wait.has_value())) {
- in_control.pr_wait->set_value();
- in_control.pr_wait = std::nullopt;
- }
- return true;
- } else {
- return false;
- }
- }
-
- seastar::future<> wait(seq_t seq) {
- auto core = seastar::this_shard_id();
- auto &in_control = in_controls[core];
- assert(seq != in_control.seq + 1);
- if (!in_control.pr_wait.has_value()) {
- in_control.pr_wait = seastar::shared_promise<>();
- }
- return in_control.pr_wait->get_shared_future();
- }
-
-private:
- struct in_control_t {
- seq_t seq = 0;
- std::optional<seastar::shared_promise<>> pr_wait;
- };
-
- // source-side
- std::vector<seq_t> out_seqs;
- // target-side
- std::vector<in_control_t> in_controls;
-};
-
struct OSDConnectionPriv : public crimson::net::Connection::user_private_t {
+ using crosscore_ordering_t = smp_crosscore_ordering_t<crosscore_type_t::ONE_N>;
+
ConnectionPipeline client_request_conn_pipeline;
ConnectionPipeline peering_request_conn_pipeline;
ConnectionPipeline replicated_request_conn_pipeline;
diff --git a/src/crimson/osd/osd_meta.cc b/src/crimson/osd/osd_meta.cc
index e40b2b2464b..06d6932bc54 100644
--- a/src/crimson/osd/osd_meta.cc
+++ b/src/crimson/osd/osd_meta.cc
@@ -9,6 +9,7 @@
#include "crimson/os/futurized_collection.h"
#include "crimson/os/futurized_store.h"
#include "os/Transaction.h"
+#include "osd/OSDMap.h"
using std::string;
using read_errorator = crimson::os::FuturizedStore::Shard::read_errorator;
@@ -24,6 +25,11 @@ void OSDMeta::store_map(ceph::os::Transaction& t,
t.write(coll->get_cid(), osdmap_oid(e), 0, m.length(), m);
}
+void OSDMeta::remove_map(ceph::os::Transaction& t, epoch_t e)
+{
+ t.remove(coll->get_cid(), osdmap_oid(e));
+}
+
seastar::future<bufferlist> OSDMeta::load_map(epoch_t e)
{
return store.read(coll,
@@ -80,6 +86,36 @@ OSDMeta::load_final_pool_info(int64_t pool) {
}));
}
+void OSDMeta::store_final_pool_info(
+ ceph::os::Transaction &t,
+ OSDMap* lastmap,
+ std::map<epoch_t, OSDMap*> &added_map)
+{
+ for (auto [e, map] : added_map) {
+ if (!lastmap) {
+ lastmap = map;
+ continue;
+ }
+ for (auto &[pool_id, pool] : lastmap->get_pools()) {
+ if (!map->have_pg_pool(pool_id)) {
+ ghobject_t obj = final_pool_info_oid(pool_id);
+ bufferlist bl;
+ encode(pool, bl, CEPH_FEATURES_ALL);
+ string name = lastmap->get_pool_name(pool_id);
+ encode(name, bl);
+ std::map<string, string> profile;
+ if (pool.is_erasure()) {
+ profile = lastmap->get_erasure_code_profile(
+ pool.erasure_code_profile);
+ }
+ encode(profile, bl);
+ t.write(coll->get_cid(), obj, 0, bl.length(), bl);
+ }
+ }
+ lastmap = map;
+ }
+}
+
ghobject_t OSDMeta::osdmap_oid(epoch_t epoch)
{
string name = fmt::format("osdmap.{}", epoch);
diff --git a/src/crimson/osd/osd_meta.h b/src/crimson/osd/osd_meta.h
index 652266d9e20..2363671e330 100644
--- a/src/crimson/osd/osd_meta.h
+++ b/src/crimson/osd/osd_meta.h
@@ -40,6 +40,7 @@ public:
void store_map(ceph::os::Transaction& t,
epoch_t e, const bufferlist& m);
+ void remove_map(ceph::os::Transaction& t, epoch_t e);
seastar::future<bufferlist> load_map(epoch_t e);
void store_superblock(ceph::os::Transaction& t,
@@ -53,6 +54,10 @@ public:
seastar::future<std::tuple<pg_pool_t,
std::string,
ec_profile_t>> load_final_pool_info(int64_t pool);
+ void store_final_pool_info(
+ ceph::os::Transaction&,
+ OSDMap* lastmap,
+ std::map<epoch_t, OSDMap*>&);
private:
static ghobject_t osdmap_oid(epoch_t epoch);
static ghobject_t final_pool_info_oid(int64_t pool);
diff --git a/src/crimson/osd/osd_operations/client_request.cc b/src/crimson/osd/osd_operations/client_request.cc
index 2160e4823f7..3fba73a0c23 100644
--- a/src/crimson/osd/osd_operations/client_request.cc
+++ b/src/crimson/osd/osd_operations/client_request.cc
@@ -204,7 +204,8 @@ ClientRequest::process_pg_op(
return pg->do_pg_ops(
m
).then_interruptible([this, pg=std::move(pg)](MURef<MOSDOpReply> reply) {
- return conn->send(std::move(reply));
+ // TODO: gate the crosscore sending
+ return conn->send_with_throttling(std::move(reply));
});
}
@@ -218,25 +219,17 @@ auto ClientRequest::reply_op_error(const Ref<PG>& pg, int err)
!m->has_flag(CEPH_OSD_FLAG_RETURNVEC));
reply->set_reply_versions(eversion_t(), 0);
reply->set_op_returns(std::vector<pg_log_op_return_item_t>{});
- return conn->send(std::move(reply));
+ // TODO: gate the crosscore sending
+ return conn->send_with_throttling(std::move(reply));
}
ClientRequest::interruptible_future<>
ClientRequest::process_op(instance_handle_t &ihref, Ref<PG> &pg)
{
return ihref.enter_stage<interruptor>(
- client_pp(*pg).recover_missing,
- *this
- ).then_interruptible(
- [this, pg]() mutable {
- LOG_PREFIX(ClientRequest::process_op);
- if (pg->is_primary()) {
- return do_recover_missing(pg, m->get_hobj());
- } else {
- DEBUGI("process_op: Skipping do_recover_missing"
- "on non primary pg");
- return interruptor::now();
- }
+ client_pp(*pg).recover_missing, *this
+ ).then_interruptible([pg, this]() mutable {
+ return recover_missings(pg, m->get_hobj(), snaps_need_to_recover());
}).then_interruptible([this, pg, &ihref]() mutable {
return pg->already_complete(m->get_reqid()).then_interruptible(
[this, pg, &ihref](auto completed) mutable
@@ -246,7 +239,8 @@ ClientRequest::process_op(instance_handle_t &ihref, Ref<PG> &pg)
m.get(), completed->err, pg->get_osdmap_epoch(),
CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK, false);
reply->set_reply_versions(completed->version, completed->user_version);
- return conn->send(std::move(reply));
+ // TODO: gate the crosscore sending
+ return conn->send_with_throttling(std::move(reply));
} else {
return ihref.enter_stage<interruptor>(client_pp(*pg).get_obc, *this
).then_interruptible(
@@ -256,7 +250,7 @@ ClientRequest::process_op(instance_handle_t &ihref, Ref<PG> &pg)
op_info.set_from_op(&*m, *pg->get_osdmap());
return pg->with_locked_obc(
m->get_hobj(), op_info,
- [this, pg, &ihref](auto obc) mutable {
+ [this, pg, &ihref](auto head, auto obc) mutable {
LOG_PREFIX(ClientRequest::process_op);
DEBUGI("{}: got obc {}", *this, obc->obs);
return ihref.enter_stage<interruptor>(
@@ -319,13 +313,13 @@ ClientRequest::do_process(
SnapContext snapc = get_snapc(pg,obc);
- if ((m->has_flag(CEPH_OSD_FLAG_ORDERSNAP)) &&
- snapc.seq < obc->ssc->snapset.seq) {
- DEBUGI("{} ORDERSNAP flag set and snapc seq {}",
- " < snapset seq {} on {}",
- __func__, snapc.seq, obc->ssc->snapset.seq,
- obc->obs.oi.soid);
- return reply_op_error(pg, -EOLDSNAPC);
+ if (m->has_flag(CEPH_OSD_FLAG_ORDERSNAP) &&
+ snapc.seq < obc->ssc->snapset.seq) {
+ DEBUGI("{} ORDERSNAP flag set and snapc seq {}",
+ " < snapset seq {} on {}",
+ __func__, snapc.seq, obc->ssc->snapset.seq,
+ obc->obs.oi.soid);
+ return reply_op_error(pg, -EOLDSNAPC);
}
if (!pg->is_primary()) {
@@ -360,8 +354,10 @@ ClientRequest::do_process(
[this, reply=std::move(reply)]() mutable {
LOG_PREFIX(ClientRequest::do_process);
DEBUGI("{}: sending response", *this);
- return conn->send(std::move(reply));
- });
+ // TODO: gate the crosscore sending
+ return conn->send_with_throttling(std::move(reply));
+ }
+ );
}, crimson::ct_error::eagain::handle([this, pg, &ihref]() mutable {
return process_op(ihref, pg);
}));
diff --git a/src/crimson/osd/osd_operations/client_request.h b/src/crimson/osd/osd_operations/client_request.h
index 3c8c146a4f2..43535e156c5 100644
--- a/src/crimson/osd/osd_operations/client_request.h
+++ b/src/crimson/osd/osd_operations/client_request.h
@@ -160,6 +160,16 @@ public:
}
auto get_instance_handle() { return instance_handle; }
+ std::vector<snapid_t> snaps_need_to_recover() {
+ std::vector<snapid_t> ret;
+ for (auto &op : m->ops) {
+ if (op.op.op == CEPH_OSD_OP_ROLLBACK) {
+ ret.emplace_back((snapid_t)op.op.snap.snapid);
+ }
+ }
+ return ret;
+ }
+
using ordering_hook_t = boost::intrusive::list_member_hook<>;
ordering_hook_t ordering_hook;
class Orderer {
diff --git a/src/crimson/osd/osd_operations/client_request_common.cc b/src/crimson/osd/osd_operations/client_request_common.cc
index cfd22c774e0..903da59a3dc 100644
--- a/src/crimson/osd/osd_operations/client_request_common.cc
+++ b/src/crimson/osd/osd_operations/client_request_common.cc
@@ -11,8 +11,54 @@ namespace {
}
}
+SET_SUBSYS(osd);
+
namespace crimson::osd {
+InterruptibleOperation::template interruptible_future<>
+CommonClientRequest::recover_missings(
+ Ref<PG> &pg,
+ const hobject_t& soid,
+ std::vector<snapid_t> &&snaps)
+{
+ using interruptor = InterruptibleOperation::interruptor;
+ LOG_PREFIX(CommonClientRequest::recover_missings);
+ auto fut = interruptor::now();
+ if (!pg->is_primary()) {
+ DEBUGI("process_op: Skipping do_recover_missing on non primary pg");
+ return fut;
+ }
+ if (!soid.is_head()) {
+ fut = do_recover_missing(pg, soid.get_head());
+ }
+ return seastar::do_with(
+ std::move(snaps),
+ [pg, soid, fut=std::move(fut)](auto &snaps) mutable {
+ return fut.then_interruptible([&snaps, pg, soid]() mutable {
+ return pg->obc_loader.with_obc<RWState::RWREAD>(
+ soid.get_head(),
+ [&snaps, pg, soid](auto head, auto) mutable {
+ auto oid = resolve_oid(head->get_head_ss(), soid);
+ assert(oid);
+ return do_recover_missing(pg, *oid
+ ).then_interruptible([&snaps, pg, soid, head]() mutable {
+ return InterruptibleOperation::interruptor::do_for_each(
+ snaps,
+ [pg, soid, head](auto &snap) mutable {
+ auto coid = head->obs.oi.soid;
+ coid.snap = snap;
+ auto oid = resolve_oid(head->get_head_ss(), coid);
+ assert(oid);
+ return do_recover_missing(pg, *oid);
+ });
+ });
+ });
+ }).handle_error_interruptible(
+ crimson::ct_error::assert_all("unexpected error")
+ );
+ });
+}
+
typename InterruptibleOperation::template interruptible_future<>
CommonClientRequest::do_recover_missing(
Ref<PG>& pg, const hobject_t& soid)
diff --git a/src/crimson/osd/osd_operations/client_request_common.h b/src/crimson/osd/osd_operations/client_request_common.h
index 6a8a789668c..46aa038e343 100644
--- a/src/crimson/osd/osd_operations/client_request_common.h
+++ b/src/crimson/osd/osd_operations/client_request_common.h
@@ -10,6 +10,13 @@
namespace crimson::osd {
struct CommonClientRequest {
+
+ static InterruptibleOperation::template interruptible_future<>
+ recover_missings(
+ Ref<PG> &pg,
+ const hobject_t& soid,
+ std::vector<snapid_t> &&snaps);
+
static InterruptibleOperation::template interruptible_future<>
do_recover_missing(Ref<PG>& pg, const hobject_t& soid);
diff --git a/src/crimson/osd/osd_operations/common/pg_pipeline.h b/src/crimson/osd/osd_operations/common/pg_pipeline.h
index 58fa07b8b4d..1e5d0e511f6 100644
--- a/src/crimson/osd/osd_operations/common/pg_pipeline.h
+++ b/src/crimson/osd/osd_operations/common/pg_pipeline.h
@@ -19,7 +19,7 @@ protected:
} wait_for_active;
struct RecoverMissing : OrderedExclusivePhaseT<RecoverMissing> {
static constexpr auto type_name = "CommonPGPipeline::recover_missing";
- } recover_missing;
+ } recover_missing, recover_missing2;
struct GetOBC : OrderedExclusivePhaseT<GetOBC> {
static constexpr auto type_name = "CommonPGPipeline::get_obc";
} get_obc;
diff --git a/src/crimson/osd/osd_operations/internal_client_request.cc b/src/crimson/osd/osd_operations/internal_client_request.cc
index ea45e92c148..527d982f1e9 100644
--- a/src/crimson/osd/osd_operations/internal_client_request.cc
+++ b/src/crimson/osd/osd_operations/internal_client_request.cc
@@ -85,7 +85,7 @@ seastar::future<> InternalClientRequest::start()
std::as_const(osd_ops), pg->get_pgid().pgid, *pg->get_osdmap());
assert(ret == 0);
return pg->with_locked_obc(get_target_oid(), op_info,
- [&osd_ops, this](auto obc) {
+ [&osd_ops, this](auto, auto obc) {
return enter_stage<interruptor>(client_pp().process
).then_interruptible(
[obc=std::move(obc), &osd_ops, this] {
diff --git a/src/crimson/osd/osd_operations/snaptrim_event.cc b/src/crimson/osd/osd_operations/snaptrim_event.cc
index 8ae36a5483d..20f7439f52f 100644
--- a/src/crimson/osd/osd_operations/snaptrim_event.cc
+++ b/src/crimson/osd/osd_operations/snaptrim_event.cc
@@ -220,8 +220,7 @@ SnapTrimObjSubEvent::remove_or_update_iertr::future<>
SnapTrimObjSubEvent::remove_clone(
ObjectContextRef obc,
ObjectContextRef head_obc,
- ceph::os::Transaction& txn,
- std::vector<pg_log_entry_t>& log_entries
+ ceph::os::Transaction& txn
) {
const auto p = std::find(
head_obc->ssc->snapset.clones.begin(),
@@ -268,17 +267,14 @@ SnapTrimObjSubEvent::remove_clone(
head_obc->ssc->snapset.clone_size.erase(last);
head_obc->ssc->snapset.clone_snaps.erase(last);
- log_entries.emplace_back(
- pg_log_entry_t{
- pg_log_entry_t::DELETE,
- coid,
- osd_op_p.at_version,
- obc->obs.oi.version,
- 0,
- osd_reqid_t(),
- obc->obs.oi.mtime, // will be replaced in `apply_to()`
- 0}
- );
+ add_log_entry(
+ pg_log_entry_t::DELETE,
+ coid,
+ obc->obs.oi.version,
+ 0,
+ osd_reqid_t(),
+ obc->obs.oi.mtime, // will be replaced in `apply_to()`
+ 0);
txn.remove(
pg->get_collection_ref()->get_cid(),
ghobject_t{coid, ghobject_t::NO_GEN, shard_id_t::NO_SHARD});
@@ -289,8 +285,7 @@ SnapTrimObjSubEvent::remove_clone(
void SnapTrimObjSubEvent::remove_head_whiteout(
ObjectContextRef obc,
ObjectContextRef head_obc,
- ceph::os::Transaction& txn,
- std::vector<pg_log_entry_t>& log_entries
+ ceph::os::Transaction& txn
) {
// NOTE: this arguably constitutes minor interference with the
// tiering agent if this is a cache tier since a snap trim event
@@ -299,17 +294,14 @@ void SnapTrimObjSubEvent::remove_head_whiteout(
const auto head_oid = coid.get_head();
logger().info("{}: {} removing {}",
*this, coid, head_oid);
- log_entries.emplace_back(
- pg_log_entry_t{
- pg_log_entry_t::DELETE,
- head_oid,
- osd_op_p.at_version,
- head_obc->obs.oi.version,
- 0,
- osd_reqid_t(),
- obc->obs.oi.mtime, // will be replaced in `apply_to()`
- 0}
- );
+ add_log_entry(
+ pg_log_entry_t::DELETE,
+ head_oid,
+ head_obc->obs.oi.version,
+ 0,
+ osd_reqid_t(),
+ obc->obs.oi.mtime, // will be replaced in `apply_to()`
+ 0);
logger().info("{}: remove snap head", *this);
object_info_t& oi = head_obc->obs.oi;
delta_stats.num_objects--;
@@ -335,8 +327,7 @@ SnapTrimObjSubEvent::adjust_snaps(
ObjectContextRef obc,
ObjectContextRef head_obc,
const std::set<snapid_t>& new_snaps,
- ceph::os::Transaction& txn,
- std::vector<pg_log_entry_t>& log_entries
+ ceph::os::Transaction& txn
) {
head_obc->ssc->snapset.clone_snaps[coid.snap] =
std::vector<snapid_t>(new_snaps.rbegin(), new_snaps.rend());
@@ -354,17 +345,14 @@ SnapTrimObjSubEvent::adjust_snaps(
ghobject_t{coid, ghobject_t::NO_GEN, shard_id_t::NO_SHARD},
OI_ATTR,
bl);
- log_entries.emplace_back(
- pg_log_entry_t{
- pg_log_entry_t::MODIFY,
- coid,
- obc->obs.oi.version,
- obc->obs.oi.prior_version,
- 0,
- osd_reqid_t(),
- obc->obs.oi.mtime,
- 0}
- );
+ add_log_entry(
+ pg_log_entry_t::MODIFY,
+ coid,
+ obc->obs.oi.prior_version,
+ 0,
+ osd_reqid_t(),
+ obc->obs.oi.mtime,
+ 0);
return OpsExecuter::snap_map_modify(
coid, new_snaps, pg->snap_mapper, pg->osdriver, txn);
}
@@ -372,23 +360,19 @@ SnapTrimObjSubEvent::adjust_snaps(
void SnapTrimObjSubEvent::update_head(
ObjectContextRef obc,
ObjectContextRef head_obc,
- ceph::os::Transaction& txn,
- std::vector<pg_log_entry_t>& log_entries
+ ceph::os::Transaction& txn
) {
const auto head_oid = coid.get_head();
logger().info("{}: writing updated snapset on {}, snapset is {}",
*this, head_oid, head_obc->ssc->snapset);
- log_entries.emplace_back(
- pg_log_entry_t{
- pg_log_entry_t::MODIFY,
- head_oid,
- osd_op_p.at_version,
- head_obc->obs.oi.version,
- 0,
- osd_reqid_t(),
- obc->obs.oi.mtime,
- 0}
- );
+ add_log_entry(
+ pg_log_entry_t::MODIFY,
+ head_oid,
+ head_obc->obs.oi.version,
+ 0,
+ osd_reqid_t(),
+ obc->obs.oi.mtime,
+ 0);
head_obc->obs.oi.prior_version = head_obc->obs.oi.version;
head_obc->obs.oi.version = osd_op_p.at_version;
@@ -408,8 +392,7 @@ void SnapTrimObjSubEvent::update_head(
attrs);
}
-SnapTrimObjSubEvent::remove_or_update_iertr::future<
- SnapTrimObjSubEvent::remove_or_update_ret_t>
+SnapTrimObjSubEvent::remove_or_update_iertr::future<ceph::os::Transaction>
SnapTrimObjSubEvent::remove_or_update(
ObjectContextRef obc,
ObjectContextRef head_obc)
@@ -442,45 +425,41 @@ SnapTrimObjSubEvent::remove_or_update(
}
return seastar::do_with(ceph::os::Transaction{}, [=, this](auto &txn) {
- std::vector<pg_log_entry_t> log_entries{};
-
- int64_t num_objects_before_trim = delta_stats.num_objects;
- osd_op_p.at_version = pg->next_version();
- auto ret = remove_or_update_iertr::now();
- if (new_snaps.empty()) {
- // remove clone from snapset
- logger().info("{}: {} snaps {} -> {} ... deleting",
- *this, coid, old_snaps, new_snaps);
- ret = remove_clone(obc, head_obc, txn, log_entries);
- } else {
- // save adjusted snaps for this object
- logger().info("{}: {} snaps {} -> {}",
- *this, coid, old_snaps, new_snaps);
- ret = adjust_snaps(obc, head_obc, new_snaps, txn, log_entries);
- }
- return std::move(ret).si_then(
- [&txn, obc, num_objects_before_trim, log_entries=std::move(log_entries), head_obc=std::move(head_obc), this]() mutable {
- osd_op_p.at_version = pg->next_version();
-
- // save head snapset
- logger().debug("{}: {} new snapset {} on {}",
- *this, coid, head_obc->ssc->snapset, head_obc->obs.oi);
- if (head_obc->ssc->snapset.clones.empty() && head_obc->obs.oi.is_whiteout()) {
- remove_head_whiteout(obc, head_obc, txn, log_entries);
+ int64_t num_objects_before_trim = delta_stats.num_objects;
+ osd_op_p.at_version = pg->get_next_version();
+ auto ret = remove_or_update_iertr::now();
+ if (new_snaps.empty()) {
+ // remove clone from snapset
+ logger().info("{}: {} snaps {} -> {} ... deleting",
+ *this, coid, old_snaps, new_snaps);
+ ret = remove_clone(obc, head_obc, txn);
} else {
- update_head(obc, head_obc, txn, log_entries);
- }
- // Stats reporting - Set number of objects trimmed
- if (num_objects_before_trim > delta_stats.num_objects) {
- //int64_t num_objects_trimmed =
- // num_objects_before_trim - delta_stats.num_objects;
- //add_objects_trimmed_count(num_objects_trimmed);
+ // save adjusted snaps for this object
+ logger().info("{}: {} snaps {} -> {}",
+ *this, coid, old_snaps, new_snaps);
+ ret = adjust_snaps(obc, head_obc, new_snaps, txn);
}
- }).si_then(
- [&txn, log_entries=std::move(log_entries)] () mutable {
- return remove_or_update_iertr::make_ready_future<remove_or_update_ret_t>(
- std::make_pair(std::move(txn), std::move(log_entries)));
- });
+ return std::move(ret).si_then(
+ [&txn, obc, num_objects_before_trim,
+ head_obc=std::move(head_obc), this]() mutable {
+ // save head snapset
+ logger().debug("{}: {} new snapset {} on {}",
+ *this, coid, head_obc->ssc->snapset, head_obc->obs.oi);
+ if (head_obc->ssc->snapset.clones.empty() && head_obc->obs.oi.is_whiteout()) {
+ remove_head_whiteout(obc, head_obc, txn);
+ } else {
+ update_head(obc, head_obc, txn);
+ }
+ // Stats reporting - Set number of objects trimmed
+ if (num_objects_before_trim > delta_stats.num_objects) {
+ //int64_t num_objects_trimmed =
+ // num_objects_before_trim - delta_stats.num_objects;
+ //add_objects_trimmed_count(num_objects_trimmed);
+ }
+ }).si_then(
+ [&txn] () mutable {
+ return std::move(txn);
+ });
});
}
@@ -518,8 +497,7 @@ SnapTrimObjSubEvent::start()
logger().debug("{}: processing clone_obc={}", *this, clone_obc->get_oid());
return remove_or_update(
clone_obc, head_obc
- ).safe_then_unpack_interruptible([clone_obc, this]
- (auto&& txn, auto&& log_entries) mutable {
+ ).safe_then_interruptible([clone_obc, this](auto&& txn) mutable {
auto [submitted, all_completed] = pg->submit_transaction(
std::move(clone_obc),
std::move(txn),
diff --git a/src/crimson/osd/osd_operations/snaptrim_event.h b/src/crimson/osd/osd_operations/snaptrim_event.h
index f7c512881de..5afba7d7a47 100644
--- a/src/crimson/osd/osd_operations/snaptrim_event.h
+++ b/src/crimson/osd/osd_operations/snaptrim_event.h
@@ -147,28 +147,22 @@ private:
remove_or_update_iertr::future<> remove_clone(
ObjectContextRef obc,
ObjectContextRef head_obc,
- ceph::os::Transaction& txn,
- std::vector<pg_log_entry_t>& log_entries);
+ ceph::os::Transaction& txn);
void remove_head_whiteout(
ObjectContextRef obc,
ObjectContextRef head_obc,
- ceph::os::Transaction& txn,
- std::vector<pg_log_entry_t>& log_entries);
+ ceph::os::Transaction& txn);
interruptible_future<> adjust_snaps(
ObjectContextRef obc,
ObjectContextRef head_obc,
const std::set<snapid_t>& new_snaps,
- ceph::os::Transaction& txn,
- std::vector<pg_log_entry_t>& log_entries);
+ ceph::os::Transaction& txn);
void update_head(
ObjectContextRef obc,
ObjectContextRef head_obc,
- ceph::os::Transaction& txn,
- std::vector<pg_log_entry_t>& log_entries);
+ ceph::os::Transaction& txn);
- using remove_or_update_ret_t =
- std::pair<ceph::os::Transaction, std::vector<pg_log_entry_t>>;
- remove_or_update_iertr::future<remove_or_update_ret_t>
+ remove_or_update_iertr::future<ceph::os::Transaction>
remove_or_update(ObjectContextRef obc, ObjectContextRef head_obc);
// we don't need to synchronize with other instances started by
@@ -177,11 +171,32 @@ private:
static constexpr auto type_name = "SnapTrimObjSubEvent::wait_repop";
} wait_repop;
+ void add_log_entry(
+ int _op,
+ const hobject_t& _soid,
+ const eversion_t& pv,
+ version_t uv,
+ const osd_reqid_t& rid,
+ const utime_t& mt,
+ int return_code) {
+ log_entries.emplace_back(
+ _op,
+ _soid,
+ osd_op_p.at_version,
+ pv,
+ uv,
+ rid,
+ mt,
+ return_code);
+ osd_op_p.at_version.version++;
+ }
+
Ref<PG> pg;
PipelineHandle handle;
osd_op_params_t osd_op_p;
const hobject_t coid;
const snapid_t snap_to_trim;
+ std::vector<pg_log_entry_t> log_entries;
public:
PipelineHandle& get_handle() { return handle; }
diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc
index 91c6439f682..00592eb8347 100644
--- a/src/crimson/osd/pg.cc
+++ b/src/crimson/osd/pg.cc
@@ -773,6 +773,10 @@ PG::submit_transaction(
peering_state.append_log_with_trim_to_updated(std::move(log_entries), osd_op_p.at_version,
txn, true, false);
+ ceph_assert(!log_entries.empty());
+ ceph_assert(log_entries.rbegin()->version >= projected_last_update);
+ projected_last_update = log_entries.rbegin()->version;
+
auto [submitted, all_completed] = backend->mutate_object(
peering_state.get_acting_recovery_backfill(),
std::move(obc),
@@ -968,7 +972,7 @@ seastar::future<> PG::submit_error_log(
mempool::osd_pglog::list<pg_log_entry_t> log_entries;
log_entries.push_back(pg_log_entry_t(pg_log_entry_t::ERROR,
obc->obs.oi.soid,
- next_version(),
+ get_next_version(),
eversion_t(), 0,
reqid, utime_t(),
-e.value()));
@@ -1261,9 +1265,9 @@ PG::with_locked_obc(const hobject_t &hobj,
throw crimson::common::system_shutdown_exception();
}
const hobject_t oid = get_oid(hobj);
- auto wrapper = [f=std::move(f), this](auto obc) {
+ auto wrapper = [f=std::move(f), this](auto head, auto obc) {
check_blocklisted_obc_watchers(obc);
- return f(obc);
+ return f(head, obc);
};
switch (get_lock_type(op_info)) {
case RWState::RWREAD:
@@ -1531,14 +1535,19 @@ void PG::on_change(ceph::os::Transaction &t) {
}
void PG::context_registry_on_change() {
- obc_registry.for_each([](ObjectContextRef obc) {
- assert(obc);
- for (auto j = obc->watchers.begin();
- j != obc->watchers.end();
- j = obc->watchers.erase(j)) {
- j->second->discard_state();
- }
+ std::vector<seastar::shared_ptr<crimson::osd::Watch>> watchers;
+ obc_registry.for_each([&watchers](ObjectContextRef obc) {
+ assert(obc);
+ for (auto j = obc->watchers.begin();
+ j != obc->watchers.end();
+ j = obc->watchers.erase(j)) {
+ watchers.emplace_back(j->second);
+ }
});
+
+ for (auto &watcher : watchers) {
+ watcher->discard_state();
+ }
}
bool PG::can_discard_op(const MOSDOp& m) const {
diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h
index 056d5e9e1c0..4628b09b509 100644
--- a/src/crimson/osd/pg.h
+++ b/src/crimson/osd/pg.h
@@ -348,8 +348,7 @@ public:
void on_active_advmap(const OSDMapRef &osdmap) final;
epoch_t cluster_osdmap_trim_lower_bound() final {
- // TODO
- return 0;
+ return shard_services.get_osdmap_tlb();
}
void on_backfill_reserved() final {
@@ -506,7 +505,7 @@ public:
public:
using with_obc_func_t =
- std::function<load_obc_iertr::future<> (ObjectContextRef)>;
+ std::function<load_obc_iertr::future<> (ObjectContextRef, ObjectContextRef)>;
load_obc_iertr::future<> with_locked_obc(
const hobject_t &hobj,
@@ -610,9 +609,9 @@ private:
public:
cached_map_t get_osdmap() { return peering_state.get_osdmap(); }
- eversion_t next_version() {
+ eversion_t get_next_version() {
return eversion_t(get_osdmap_epoch(),
- ++projected_last_update.version);
+ projected_last_update.version + 1);
}
ShardServices& get_shard_services() final {
return shard_services;
@@ -643,6 +642,12 @@ private:
std::optional<pg_stat_t> pg_stats;
public:
+ OSDriver &get_osdriver() final {
+ return osdriver;
+ }
+ SnapMapper &get_snap_mapper() final {
+ return snap_mapper;
+ }
RecoveryBackend* get_recovery_backend() final {
return recovery_backend.get();
}
diff --git a/src/crimson/osd/pg_backend.cc b/src/crimson/osd/pg_backend.cc
index 02acb9a55d3..5e3e7a91c0f 100644
--- a/src/crimson/osd/pg_backend.cc
+++ b/src/crimson/osd/pg_backend.cc
@@ -787,7 +787,7 @@ PGBackend::rollback_iertr::future<> PGBackend::rollback(
return obc_loader.with_clone_obc_only<RWState::RWWRITE>(
head, target_coid,
[this, &os, &txn, &delta_stats, &osd_op_params]
- (auto resolved_obc) {
+ (auto, auto resolved_obc) {
if (resolved_obc->obs.oi.soid.is_head()) {
// no-op: The resolved oid returned the head object
logger().debug("PGBackend::rollback: loaded head_obc: {}"
diff --git a/src/crimson/osd/pg_recovery.cc b/src/crimson/osd/pg_recovery.cc
index 09b45779ec8..13ac069c63d 100644
--- a/src/crimson/osd/pg_recovery.cc
+++ b/src/crimson/osd/pg_recovery.cc
@@ -140,13 +140,14 @@ size_t PGRecovery::start_primary_recovery_ops(
hobject_t head = soid.get_head();
+ bool head_missing = missing.is_missing(head);
logger().info(
"{} {} item.need {} {} {} {} {}",
__func__,
soid,
item.need,
missing.is_missing(soid) ? " (missing)":"",
- missing.is_missing(head) ? " (missing head)":"",
+ head_missing ? " (missing head)":"",
pg->get_recovery_backend()->is_recovering(soid) ? " (recovering)":"",
pg->get_recovery_backend()->is_recovering(head) ? " (recovering head)":"");
@@ -158,7 +159,15 @@ size_t PGRecovery::start_primary_recovery_ops(
} else if (pg->get_recovery_backend()->is_recovering(head)) {
++skipped;
} else {
- out->emplace_back(recover_missing(trigger, soid, item.need));
+ if (head_missing) {
+ auto it = missing.get_items().find(head);
+ assert(it != missing.get_items().end());
+ auto head_need = it->second.need;
+ out->emplace_back(recover_missing(trigger, head, head_need));
+ ++skipped;
+ } else {
+ out->emplace_back(recover_missing(trigger, soid, item.need));
+ }
++started;
}
@@ -266,20 +275,27 @@ PGRecovery::recover_missing(
RecoveryBackend::RecoveryBlockingEvent::TriggerI& trigger,
const hobject_t &soid, eversion_t need)
{
- if (pg->get_peering_state().get_missing_loc().is_deleted(soid)) {
- return pg->get_recovery_backend()->add_recovering(soid).wait_track_blocking(
- trigger,
- pg->get_recovery_backend()->recover_delete(soid, need));
+ logger().info("{} {} v {}", __func__, soid, need);
+ auto [recovering, added] = pg->get_recovery_backend()->add_recovering(soid);
+ if (added) {
+ logger().info("{} {} v {}, new recovery", __func__, soid, need);
+ if (pg->get_peering_state().get_missing_loc().is_deleted(soid)) {
+ return recovering.wait_track_blocking(
+ trigger,
+ pg->get_recovery_backend()->recover_delete(soid, need));
+ } else {
+ return recovering.wait_track_blocking(
+ trigger,
+ pg->get_recovery_backend()->recover_object(soid, need)
+ .handle_exception_interruptible(
+ [=, this, soid = std::move(soid)] (auto e) {
+ on_failed_recover({ pg->get_pg_whoami() }, soid, need);
+ return seastar::make_ready_future<>();
+ })
+ );
+ }
} else {
- return pg->get_recovery_backend()->add_recovering(soid).wait_track_blocking(
- trigger,
- pg->get_recovery_backend()->recover_object(soid, need)
- .handle_exception_interruptible(
- [=, this, soid = std::move(soid)] (auto e) {
- on_failed_recover({ pg->get_pg_whoami() }, soid, need);
- return seastar::make_ready_future<>();
- })
- );
+ return recovering.wait_for_recovered();
}
}
@@ -288,16 +304,23 @@ RecoveryBackend::interruptible_future<> PGRecovery::prep_object_replica_deletes(
const hobject_t& soid,
eversion_t need)
{
- return pg->get_recovery_backend()->add_recovering(soid).wait_track_blocking(
- trigger,
- pg->get_recovery_backend()->push_delete(soid, need).then_interruptible(
- [=, this] {
- object_stat_sum_t stat_diff;
- stat_diff.num_objects_recovered = 1;
- on_global_recover(soid, stat_diff, true);
- return seastar::make_ready_future<>();
- })
- );
+ logger().info("{} {} v {}", __func__, soid, need);
+ auto [recovering, added] = pg->get_recovery_backend()->add_recovering(soid);
+ if (added) {
+ logger().info("{} {} v {}, new recovery", __func__, soid, need);
+ return recovering.wait_track_blocking(
+ trigger,
+ pg->get_recovery_backend()->push_delete(soid, need).then_interruptible(
+ [=, this] {
+ object_stat_sum_t stat_diff;
+ stat_diff.num_objects_recovered = 1;
+ on_global_recover(soid, stat_diff, true);
+ return seastar::make_ready_future<>();
+ })
+ );
+ } else {
+ return recovering.wait_for_recovered();
+ }
}
RecoveryBackend::interruptible_future<> PGRecovery::prep_object_replica_pushes(
@@ -305,18 +328,26 @@ RecoveryBackend::interruptible_future<> PGRecovery::prep_object_replica_pushes(
const hobject_t& soid,
eversion_t need)
{
- return pg->get_recovery_backend()->add_recovering(soid).wait_track_blocking(
- trigger,
- pg->get_recovery_backend()->recover_object(soid, need)
- .handle_exception_interruptible(
- [=, this, soid = std::move(soid)] (auto e) {
- on_failed_recover({ pg->get_pg_whoami() }, soid, need);
- return seastar::make_ready_future<>();
- })
- );
+ logger().info("{} {} v {}", __func__, soid, need);
+ auto [recovering, added] = pg->get_recovery_backend()->add_recovering(soid);
+ if (added) {
+ logger().info("{} {} v {}, new recovery", __func__, soid, need);
+ return recovering.wait_track_blocking(
+ trigger,
+ pg->get_recovery_backend()->recover_object(soid, need)
+ .handle_exception_interruptible(
+ [=, this, soid = std::move(soid)] (auto e) {
+ on_failed_recover({ pg->get_pg_whoami() }, soid, need);
+ return seastar::make_ready_future<>();
+ })
+ );
+ } else {
+ return recovering.wait_for_recovered();
+ }
}
-void PGRecovery::on_local_recover(
+RecoveryBackend::interruptible_future<>
+PGRecovery::on_local_recover(
const hobject_t& soid,
const ObjectRecoveryInfo& recovery_info,
const bool is_delete,
@@ -332,20 +363,38 @@ void PGRecovery::on_local_recover(
ceph_abort("mark_unfound_lost (LOST_REVERT) is not implemented yet");
}
}
- pg->get_peering_state().recover_got(soid,
- recovery_info.version, is_delete, t);
-
- if (pg->is_primary()) {
- if (!is_delete) {
- auto& obc = pg->get_recovery_backend()->get_recovering(soid).obc; //TODO: move to pg backend?
- obc->obs.exists = true;
- obc->obs.oi = recovery_info.oi;
+
+ return RecoveryBackend::interruptor::async(
+ [soid, &recovery_info, is_delete, &t, this] {
+ if (soid.is_snap()) {
+ OSDriver::OSTransaction _t(pg->get_osdriver().get_transaction(&t));
+ int r = pg->get_snap_mapper().remove_oid(soid, &_t);
+ assert(r == 0 || r == -ENOENT);
+
+ if (!is_delete) {
+ set<snapid_t> snaps;
+ auto p = recovery_info.ss.clone_snaps.find(soid.snap);
+ assert(p != recovery_info.ss.clone_snaps.end());
+ snaps.insert(p->second.begin(), p->second.end());
+ pg->get_snap_mapper().add_oid(recovery_info.soid, snaps, &_t);
+ }
}
- if (!pg->is_unreadable_object(soid)) {
- pg->get_recovery_backend()->get_recovering(soid).set_readable();
+
+ pg->get_peering_state().recover_got(soid,
+ recovery_info.version, is_delete, t);
+
+ if (pg->is_primary()) {
+ if (!is_delete) {
+ auto& obc = pg->get_recovery_backend()->get_recovering(soid).obc; //TODO: move to pg backend?
+ obc->obs.exists = true;
+ obc->obs.oi = recovery_info.oi;
+ }
+ if (!pg->is_unreadable_object(soid)) {
+ pg->get_recovery_backend()->get_recovering(soid).set_readable();
+ }
+ pg->publish_stats_to_osd();
}
- pg->publish_stats_to_osd();
- }
+ });
}
void PGRecovery::on_global_recover (
@@ -449,9 +498,11 @@ void PGRecovery::enqueue_push(
const hobject_t& obj,
const eversion_t& v)
{
- logger().debug("{}: obj={} v={}",
+ logger().info("{}: obj={} v={}",
__func__, obj, v);
- pg->get_recovery_backend()->add_recovering(obj);
+ auto [recovering, added] = pg->get_recovery_backend()->add_recovering(obj);
+ if (!added)
+ return;
std::ignore = pg->get_recovery_backend()->recover_object(obj, v).\
handle_exception_interruptible([] (auto) {
ceph_abort_msg("got exception on backfill's push");
diff --git a/src/crimson/osd/pg_recovery.h b/src/crimson/osd/pg_recovery.h
index 719d0ad2d34..ad224073857 100644
--- a/src/crimson/osd/pg_recovery.h
+++ b/src/crimson/osd/pg_recovery.h
@@ -64,7 +64,7 @@ private:
const hobject_t& soid,
eversion_t need);
- void on_local_recover(
+ RecoveryBackend::interruptible_future<> on_local_recover(
const hobject_t& soid,
const ObjectRecoveryInfo& recovery_info,
bool is_delete,
diff --git a/src/crimson/osd/pg_recovery_listener.h b/src/crimson/osd/pg_recovery_listener.h
index c922b99561a..a53221c4a69 100644
--- a/src/crimson/osd/pg_recovery_listener.h
+++ b/src/crimson/osd/pg_recovery_listener.h
@@ -36,4 +36,6 @@ public:
virtual const pg_info_t& get_info() const= 0;
virtual seastar::future<> stop() = 0;
virtual void publish_stats_to_osd() = 0;
+ virtual OSDriver &get_osdriver() = 0;
+ virtual SnapMapper &get_snap_mapper() = 0;
};
diff --git a/src/crimson/osd/pg_shard_manager.cc b/src/crimson/osd/pg_shard_manager.cc
index 6061c856be2..1980b36271c 100644
--- a/src/crimson/osd/pg_shard_manager.cc
+++ b/src/crimson/osd/pg_shard_manager.cc
@@ -105,4 +105,13 @@ seastar::future<> PGShardManager::set_up_epoch(epoch_t e) {
});
}
+seastar::future<> PGShardManager::set_superblock(OSDSuperblock superblock) {
+ ceph_assert(seastar::this_shard_id() == PRIMARY_CORE);
+ get_osd_singleton_state().set_singleton_superblock(superblock);
+ return shard_services.invoke_on_all(
+ [superblock = std::move(superblock)](auto &local_service) {
+ return local_service.local_state.update_shard_superblock(superblock);
+ });
+}
+
}
diff --git a/src/crimson/osd/pg_shard_manager.h b/src/crimson/osd/pg_shard_manager.h
index cf13cb52bbf..1559dde0cb1 100644
--- a/src/crimson/osd/pg_shard_manager.h
+++ b/src/crimson/osd/pg_shard_manager.h
@@ -129,16 +129,17 @@ public:
FORWARD_TO_OSD_SINGLETON(init_meta_coll)
FORWARD_TO_OSD_SINGLETON(get_meta_coll)
- FORWARD_TO_OSD_SINGLETON(set_superblock)
-
// Core OSDMap methods
FORWARD_TO_OSD_SINGLETON(get_local_map)
FORWARD_TO_OSD_SINGLETON(load_map_bl)
FORWARD_TO_OSD_SINGLETON(load_map_bls)
FORWARD_TO_OSD_SINGLETON(store_maps)
+ FORWARD_TO_OSD_SINGLETON(trim_maps)
seastar::future<> set_up_epoch(epoch_t e);
+ seastar::future<> set_superblock(OSDSuperblock superblock);
+
template <typename F>
auto with_remote_shard_state(core_id_t core, F &&f) {
return shard_services.invoke_on(
@@ -151,7 +152,7 @@ public:
template <typename T, typename F>
auto process_ordered_op_remotely(
- crosscore_ordering_t::seq_t cc_seq,
+ OSDConnectionPriv::crosscore_ordering_t::seq_t cc_seq,
ShardServices &target_shard_services,
typename T::IRef &&op,
F &&f) {
diff --git a/src/crimson/osd/recovery_backend.h b/src/crimson/osd/recovery_backend.h
index 65e9bb01fbd..abf69589159 100644
--- a/src/crimson/osd/recovery_backend.h
+++ b/src/crimson/osd/recovery_backend.h
@@ -45,10 +45,10 @@ public:
coll{coll},
backend{backend} {}
virtual ~RecoveryBackend() {}
- WaitForObjectRecovery& add_recovering(const hobject_t& soid) {
+ std::pair<WaitForObjectRecovery&, bool> add_recovering(const hobject_t& soid) {
auto [it, added] = recovering.emplace(soid, new WaitForObjectRecovery{});
- assert(added);
- return *(it->second);
+ assert(it->second);
+ return {*(it->second), added};
}
WaitForObjectRecovery& get_recovering(const hobject_t& soid) {
assert(is_recovering(soid));
diff --git a/src/crimson/osd/replicated_recovery_backend.cc b/src/crimson/osd/replicated_recovery_backend.cc
index bd301cc2b67..803d067fd65 100644
--- a/src/crimson/osd/replicated_recovery_backend.cc
+++ b/src/crimson/osd/replicated_recovery_backend.cc
@@ -34,12 +34,12 @@ ReplicatedRecoveryBackend::recover_object(
return maybe_pull_missing_obj(soid, need).then_interruptible([this, soid, need] {
logger().debug("recover_object: loading obc: {}", soid);
return pg.obc_loader.with_obc<RWState::RWREAD>(soid,
- [this, soid, need](auto obc) {
+ [this, soid, need](auto head, auto obc) {
logger().debug("recover_object: loaded obc: {}", obc->obs.oi.soid);
auto& recovery_waiter = get_recovering(soid);
recovery_waiter.obc = obc;
recovery_waiter.obc->wait_recovery_read();
- return maybe_push_shards(soid, need);
+ return maybe_push_shards(head, soid, need);
}).handle_error_interruptible(
crimson::osd::PG::load_obc_ertr::all_same_way([soid](auto& code) {
// TODO: may need eio handling?
@@ -51,16 +51,18 @@ ReplicatedRecoveryBackend::recover_object(
RecoveryBackend::interruptible_future<>
ReplicatedRecoveryBackend::maybe_push_shards(
+ const crimson::osd::ObjectContextRef &head_obc,
const hobject_t& soid,
eversion_t need)
{
return seastar::do_with(
get_shards_to_push(soid),
- [this, need, soid](auto &shards) {
+ [this, need, soid, head_obc](auto &shards) {
return interruptor::parallel_for_each(
shards,
- [this, need, soid](auto shard) {
- return prep_push(soid, need, shard).then_interruptible([this, soid, shard](auto push) {
+ [this, need, soid, head_obc](auto shard) {
+ return prep_push(head_obc, soid, need, shard
+ ).then_interruptible([this, soid, shard](auto push) {
auto msg = crimson::make_message<MOSDPGPush>();
msg->from = pg.get_pg_whoami();
msg->pgid = pg.get_pgid();
@@ -113,27 +115,31 @@ ReplicatedRecoveryBackend::maybe_pull_missing_obj(
if (!local_missing.is_missing(soid)) {
return seastar::make_ready_future<>();
}
- PullOp pull_op;
- auto& recovery_waiter = get_recovering(soid);
- recovery_waiter.pull_info =
- std::make_optional<RecoveryBackend::pull_info_t>();
- auto& pull_info = *recovery_waiter.pull_info;
- prepare_pull(pull_op, pull_info, soid, need);
- auto msg = crimson::make_message<MOSDPGPull>();
- msg->from = pg.get_pg_whoami();
- msg->set_priority(pg.get_recovery_op_priority());
- msg->pgid = pg.get_pgid();
- msg->map_epoch = pg.get_osdmap_epoch();
- msg->min_epoch = pg.get_last_peering_reset();
- msg->set_pulls({std::move(pull_op)});
- return interruptor::make_interruptible(
- shard_services.send_to_osd(
+ return pg.obc_loader.with_obc<RWState::RWREAD>(soid.get_head(),
+ [this, soid, need](auto head, auto) {
+ PullOp pull_op;
+ auto& recovery_waiter = get_recovering(soid);
+ recovery_waiter.pull_info =
+ std::make_optional<RecoveryBackend::pull_info_t>();
+ auto& pull_info = *recovery_waiter.pull_info;
+ prepare_pull(head, pull_op, pull_info, soid, need);
+ auto msg = crimson::make_message<MOSDPGPull>();
+ msg->from = pg.get_pg_whoami();
+ msg->set_priority(pg.get_recovery_op_priority());
+ msg->pgid = pg.get_pgid();
+ msg->map_epoch = pg.get_osdmap_epoch();
+ msg->min_epoch = pg.get_last_peering_reset();
+ msg->set_pulls({std::move(pull_op)});
+ return shard_services.send_to_osd(
pull_info.from.osd,
std::move(msg),
- pg.get_osdmap_epoch()
- )).then_interruptible([&recovery_waiter] {
+ pg.get_osdmap_epoch());
+ }).si_then([this, soid] {
+ auto& recovery_waiter = get_recovering(soid);
return recovery_waiter.wait_for_pull();
- });
+ }).handle_error_interruptible(
+ crimson::ct_error::assert_all("unexpected error")
+ );
}
RecoveryBackend::interruptible_future<>
@@ -201,15 +207,19 @@ ReplicatedRecoveryBackend::on_local_recover_persist(
epoch_t epoch_frozen)
{
logger().debug("{}", __func__);
- ceph::os::Transaction t;
- pg.get_recovery_handler()->on_local_recover(soid, _recovery_info, is_delete, t);
- logger().debug("ReplicatedRecoveryBackend::on_local_recover_persist: do_transaction...");
- return interruptor::make_interruptible(
- shard_services.get_store().do_transaction(coll, std::move(t)))
- .then_interruptible(
- [this, epoch_frozen, last_complete = pg.get_info().last_complete] {
- pg.get_recovery_handler()->_committed_pushed_object(epoch_frozen, last_complete);
- return seastar::make_ready_future<>();
+ return seastar::do_with(
+ ceph::os::Transaction(),
+ [this, soid, &_recovery_info, is_delete, epoch_frozen](auto &t) {
+ return pg.get_recovery_handler()->on_local_recover(
+ soid, _recovery_info, is_delete, t
+ ).then_interruptible([this, &t] {
+ logger().debug("ReplicatedRecoveryBackend::{}: do_transaction...", __func__);
+ return shard_services.get_store().do_transaction(coll, std::move(t));
+ }).then_interruptible(
+ [this, epoch_frozen, last_complete = pg.get_info().last_complete] {
+ pg.get_recovery_handler()->_committed_pushed_object(epoch_frozen, last_complete);
+ return seastar::make_ready_future<>();
+ });
});
}
@@ -235,18 +245,24 @@ ReplicatedRecoveryBackend::local_recover_delete(
}
return seastar::make_ready_future<>();
}).safe_then_interruptible([this, soid, epoch_to_freeze, need] {
- ObjectRecoveryInfo recovery_info;
- recovery_info.soid = soid;
- recovery_info.version = need;
- return on_local_recover_persist(soid, recovery_info,
- true, epoch_to_freeze);
- }, PGBackend::load_metadata_ertr::all_same_way(
- [this, soid, epoch_to_freeze, need] (auto e) {
- ObjectRecoveryInfo recovery_info;
+ return seastar::do_with(
+ ObjectRecoveryInfo(),
+ [soid, need, this, epoch_to_freeze](auto &recovery_info) {
recovery_info.soid = soid;
recovery_info.version = need;
return on_local_recover_persist(soid, recovery_info,
- true, epoch_to_freeze);
+ true, epoch_to_freeze);
+ });
+ }, PGBackend::load_metadata_ertr::all_same_way(
+ [this, soid, epoch_to_freeze, need] (auto e) {
+ return seastar::do_with(
+ ObjectRecoveryInfo(),
+ [soid, need, this, epoch_to_freeze](auto &recovery_info) {
+ recovery_info.soid = soid;
+ recovery_info.version = need;
+ return on_local_recover_persist(soid, recovery_info,
+ true, epoch_to_freeze);
+ });
})
);
}
@@ -293,6 +309,7 @@ ReplicatedRecoveryBackend::recover_delete(
RecoveryBackend::interruptible_future<PushOp>
ReplicatedRecoveryBackend::prep_push(
+ const crimson::osd::ObjectContextRef &head_obc,
const hobject_t& soid,
eversion_t need,
pg_shard_t pg_shard)
@@ -323,6 +340,8 @@ ReplicatedRecoveryBackend::prep_push(
push_info.recovery_info.copy_subset = data_subset;
push_info.recovery_info.soid = soid;
push_info.recovery_info.oi = obc->obs.oi;
+ assert(head_obc->ssc);
+ push_info.recovery_info.ss = head_obc->ssc->snapset;
push_info.recovery_info.version = obc->obs.oi.version;
push_info.recovery_info.object_exist =
missing_iter->second.clean_regions.object_is_exist();
@@ -340,7 +359,9 @@ ReplicatedRecoveryBackend::prep_push(
});
}
-void ReplicatedRecoveryBackend::prepare_pull(PullOp& pull_op,
+void ReplicatedRecoveryBackend::prepare_pull(
+ const crimson::osd::ObjectContextRef &head_obc,
+ PullOp& pull_op,
pull_info_t& pull_info,
const hobject_t& soid,
eversion_t need) {
@@ -355,6 +376,10 @@ void ReplicatedRecoveryBackend::prepare_pull(PullOp& pull_op,
pull_op.recovery_info.copy_subset.insert(0, (uint64_t) -1);
pull_op.recovery_info.copy_subset.intersection_of(
missing_iter->second.clean_regions.get_dirty_regions());
+ if (soid.is_snap()) {
+ assert(head_obc->ssc);
+ pull_op.recovery_info.ss = head_obc->ssc->snapset;
+ }
pull_op.recovery_info.size = ((uint64_t) -1);
pull_op.recovery_info.object_exist =
missing_iter->second.clean_regions.object_is_exist();
@@ -689,7 +714,7 @@ ReplicatedRecoveryBackend::_handle_pull_response(
if (pull_info.recovery_progress.first) {
prepare_waiter = pg.obc_loader.with_obc<RWState::RWNONE>(
pull_info.recovery_info.soid,
- [&pull_info, &recovery_waiter, &push_op](auto obc) {
+ [&pull_info, &recovery_waiter, &push_op](auto, auto obc) {
pull_info.obc = obc;
recovery_waiter.obc = obc;
obc->obs.oi.decode_no_oid(push_op.attrset.at(OI_ATTR), push_op.soid);
@@ -724,21 +749,24 @@ ReplicatedRecoveryBackend::_handle_pull_response(
push_op.attrset, std::move(push_op.omap_entries), t)
.then_interruptible(
[this, response, &pull_info, &push_op, complete,
- t, bytes_recovered=data.length()] {
+ t, bytes_recovered=data.length()]()
+ -> RecoveryBackend::interruptible_future<bool> {
pull_info.stat.num_keys_recovered += push_op.omap_entries.size();
pull_info.stat.num_bytes_recovered += bytes_recovered;
if (complete) {
pull_info.stat.num_objects_recovered++;
- pg.get_recovery_handler()->on_local_recover(
+ return pg.get_recovery_handler()->on_local_recover(
push_op.soid, get_recovering(push_op.soid).pull_info->recovery_info,
- false, *t);
- return true;
+ false, *t
+ ).then_interruptible([] {
+ return true;
+ });
} else {
response->soid = push_op.soid;
response->recovery_info = pull_info.recovery_info;
response->recovery_progress = pull_info.recovery_progress;
- return false;
+ return seastar::make_ready_future<bool>(false);
}
});
});
@@ -834,10 +862,11 @@ ReplicatedRecoveryBackend::_handle_push(
.then_interruptible(
[this, complete, &push_op, t] {
if (complete) {
- pg.get_recovery_handler()->on_local_recover(
+ return pg.get_recovery_handler()->on_local_recover(
push_op.recovery_info.soid, push_op.recovery_info,
false, *t);
}
+ return RecoveryBackend::interruptor::now();
});
}
diff --git a/src/crimson/osd/replicated_recovery_backend.h b/src/crimson/osd/replicated_recovery_backend.h
index b023b7417e5..16d6369a91f 100644
--- a/src/crimson/osd/replicated_recovery_backend.h
+++ b/src/crimson/osd/replicated_recovery_backend.h
@@ -49,10 +49,12 @@ protected:
interruptible_future<> handle_recovery_delete_reply(
Ref<MOSDPGRecoveryDeleteReply> m);
interruptible_future<PushOp> prep_push(
+ const crimson::osd::ObjectContextRef &head_obc,
const hobject_t& soid,
eversion_t need,
pg_shard_t pg_shard);
void prepare_pull(
+ const crimson::osd::ObjectContextRef &head_obc,
PullOp& pull_op,
pull_info_t& pull_info,
const hobject_t& soid,
@@ -124,6 +126,7 @@ private:
load_obc_ertr>;
interruptible_future<> maybe_push_shards(
+ const crimson::osd::ObjectContextRef &head_obc,
const hobject_t& soid,
eversion_t need);
diff --git a/src/crimson/osd/shard_services.cc b/src/crimson/osd/shard_services.cc
index c0688f95769..d73a85fb5f8 100644
--- a/src/crimson/osd/shard_services.cc
+++ b/src/crimson/osd/shard_services.cc
@@ -72,7 +72,7 @@ seastar::future<> PerShardState::stop_pgs()
});
}
-std::map<pg_t, pg_stat_t> PerShardState::get_pg_stats() const
+std::map<pg_t, pg_stat_t> PerShardState::get_pg_stats()
{
assert_core();
std::map<pg_t, pg_stat_t> ret;
@@ -119,6 +119,13 @@ HeartbeatStampsRef PerShardState::get_hb_stamps(int peer)
return stamps->second;
}
+seastar::future<> PerShardState::update_shard_superblock(OSDSuperblock superblock)
+{
+ assert_core();
+ per_shard_superblock = std::move(superblock);
+ return seastar::now();
+}
+
OSDSingletonState::OSDSingletonState(
int whoami,
crimson::net::Messenger &cluster_msgr,
@@ -168,7 +175,8 @@ seastar::future<> OSDSingletonState::send_to_osd(
} else {
auto conn = cluster_msgr.connect(
osdmap->get_cluster_addrs(peer).front(), CEPH_ENTITY_TYPE_OSD);
- return conn->send(std::move(m));
+ // TODO: gate the crosscore sending
+ return conn->send_with_throttling(std::move(m));
}
}
@@ -351,7 +359,6 @@ void OSDSingletonState::handle_conf_change(
seastar::future<OSDSingletonState::local_cached_map_t>
OSDSingletonState::get_local_map(epoch_t e)
{
- // TODO: use LRU cache for managing osdmap, fallback to disk if we have to
if (auto found = osdmaps.find(e); found) {
logger().debug("{} osdmap.{} found in cache", __func__, e);
return seastar::make_ready_future<local_cached_map_t>(std::move(found));
@@ -391,6 +398,9 @@ seastar::future<std::map<epoch_t, bufferlist>> OSDSingletonState::load_map_bls(
logger().debug("{} loading maps [{},{}]",
__func__, first, last);
ceph_assert(first <= last);
+ // TODO: take osd_map_max into account
+ //int max = cct->_conf->osd_map_message_max;
+ //ssize_t max_bytes = cct->_conf->osd_map_message_max_bytes;
return seastar::map_reduce(boost::make_counting_iterator<epoch_t>(first),
boost::make_counting_iterator<epoch_t>(last + 1),
[this](epoch_t e) {
@@ -422,15 +432,19 @@ seastar::future<std::unique_ptr<OSDMap>> OSDSingletonState::load_map(epoch_t e)
seastar::future<> OSDSingletonState::store_maps(ceph::os::Transaction& t,
epoch_t start, Ref<MOSDMap> m)
{
- return seastar::do_for_each(
- boost::make_counting_iterator(start),
- boost::make_counting_iterator(m->get_last() + 1),
- [&t, m, this](epoch_t e) {
+ return seastar::do_with(
+ std::map<epoch_t, OSDMap*>(),
+ [&t, m, start, this](auto &added_maps) {
+ return seastar::do_for_each(
+ boost::make_counting_iterator(start),
+ boost::make_counting_iterator(m->get_last() + 1),
+ [&t, m, this, &added_maps](epoch_t e) {
if (auto p = m->maps.find(e); p != m->maps.end()) {
auto o = std::make_unique<OSDMap>();
o->decode(p->second);
logger().info("store_maps storing osdmap.{}", e);
store_map_bl(t, e, std::move(std::move(p->second)));
+ added_maps.emplace(e, o.get());
osdmaps.insert(e, std::move(o));
return seastar::now();
} else if (auto p = m->incremental_maps.find(e);
@@ -438,7 +452,8 @@ seastar::future<> OSDSingletonState::store_maps(ceph::os::Transaction& t,
logger().info("store_maps found osdmap.{} incremental map, "
"loading osdmap.{}", e, e - 1);
ceph_assert(std::cmp_greater(e, 0u));
- return load_map(e - 1).then([e, bl=p->second, &t, this](auto o) {
+ return load_map(e - 1).then(
+ [&added_maps, e, bl=p->second, &t, this](auto o) {
OSDMap::Incremental inc;
auto i = bl.cbegin();
inc.decode(i);
@@ -447,6 +462,7 @@ seastar::future<> OSDSingletonState::store_maps(ceph::os::Transaction& t,
o->encode(fbl, inc.encode_features | CEPH_FEATURE_RESERVED);
logger().info("store_maps storing osdmap.{}", o->get_epoch());
store_map_bl(t, e, std::move(fbl));
+ added_maps.emplace(e, o.get());
osdmaps.insert(e, std::move(o));
return seastar::now();
});
@@ -454,7 +470,41 @@ seastar::future<> OSDSingletonState::store_maps(ceph::os::Transaction& t,
logger().error("MOSDMap lied about what maps it had?");
return seastar::now();
}
+ }).then([&t, this, &added_maps] {
+ auto [e, map] = *added_maps.begin();
+ auto lastmap = osdmaps.find(e - 1).get();
+ meta_coll->store_final_pool_info(t, lastmap, added_maps);
+ return seastar::now();
});
+ });
+}
+
+// Note: store/set_superblock is called in later OSD::handle_osd_map
+// so we use the OSD's superblock reference meanwhile.
+void OSDSingletonState::trim_maps(ceph::os::Transaction& t,
+ OSDSuperblock& superblock)
+{
+ epoch_t min =
+ std::min(superblock.cluster_osdmap_trim_lower_bound,
+ osdmaps.cached_key_lower_bound());
+
+ if (min <= superblock.get_oldest_map()) {
+ return;
+ }
+ logger().debug("{}: min={} oldest_map={}", __func__, min, superblock.get_oldest_map());
+
+ // Trim from the superblock's oldest_map up to `min`.
+ // Break if we have exceeded the txn target size.
+ while (superblock.get_oldest_map() < min &&
+ t.get_num_ops() < crimson::common::local_conf()->osd_target_transaction_size) {
+ logger().debug("{}: removing old osdmap epoch {}", __func__, superblock.get_oldest_map());
+ meta_coll->remove_map(t, superblock.get_oldest_map());
+ superblock.maps.erase(superblock.get_oldest_map());
+ }
+
+ // we should not trim past osdmaps.cached_key_lower_bound()
+ // as there may still be PGs with those map epochs recorded.
+ ceph_assert(min <= osdmaps.cached_key_lower_bound());
}
seastar::future<Ref<PG>> ShardServices::make_pg(
@@ -715,30 +765,36 @@ seastar::future<> OSDSingletonState::send_incremental_map(
"superblock's oldest map: {}",
__func__, first, superblock.get_oldest_map());
if (first >= superblock.get_oldest_map()) {
+ // TODO: osd_map_share_max_epochs
+ // See OSDService::build_incremental_map_msg
+ if (first < superblock.cluster_osdmap_trim_lower_bound) {
+ logger().info("{}: cluster osdmap lower bound: {} "
+ " > first {}, starting with full map",
+ __func__, superblock.cluster_osdmap_trim_lower_bound, first);
+ // we don't have the next map the target wants,
+ // so start with a full map.
+ first = superblock.cluster_osdmap_trim_lower_bound;
+ }
return load_map_bls(
first, superblock.get_newest_map()
- ).then([this, &conn, first](auto&& bls) {
+ ).then([this, &conn](auto&& bls) {
auto m = crimson::make_message<MOSDMap>(
monc.get_fsid(),
osdmap->get_encoding_features());
- m->cluster_osdmap_trim_lower_bound = first;
+ m->cluster_osdmap_trim_lower_bound = superblock.cluster_osdmap_trim_lower_bound;
m->newest_map = superblock.get_newest_map();
m->maps = std::move(bls);
return conn.send(std::move(m));
});
} else {
+ // See OSDService::send_incremental_map
+ // just send latest full map
return load_map_bl(osdmap->get_epoch()
).then([this, &conn](auto&& bl) mutable {
auto m = crimson::make_message<MOSDMap>(
monc.get_fsid(),
osdmap->get_encoding_features());
- /* TODO: once we support the tracking of superblock's
- * cluster_osdmap_trim_lower_bound, the MOSDMap should
- * be populated with this value instead of the oldest_map.
- * See: OSD::handle_osd_map for how classic updates the
- * cluster's trim lower bound.
- */
- m->cluster_osdmap_trim_lower_bound = superblock.get_oldest_map();
+ m->cluster_osdmap_trim_lower_bound = superblock.cluster_osdmap_trim_lower_bound;
m->newest_map = superblock.get_newest_map();
m->maps.emplace(osdmap->get_epoch(), std::move(bl));
return conn.send(std::move(m));
diff --git a/src/crimson/osd/shard_services.h b/src/crimson/osd/shard_services.h
index d71513a6645..37993a4f679 100644
--- a/src/crimson/osd/shard_services.h
+++ b/src/crimson/osd/shard_services.h
@@ -77,6 +77,10 @@ class PerShardState {
PerfCounters *perf = nullptr;
PerfCounters *recoverystate_perf = nullptr;
+ const epoch_t& get_osdmap_tlb() {
+ return per_shard_superblock.cluster_osdmap_trim_lower_bound;
+ }
+
// Op Management
OSDOperationRegistry registry;
OperationThrottler throttler;
@@ -115,7 +119,7 @@ class PerShardState {
PGMap pg_map;
seastar::future<> stop_pgs();
- std::map<pg_t, pg_stat_t> get_pg_stats() const;
+ std::map<pg_t, pg_stat_t> get_pg_stats();
seastar::future<> broadcast_map_to_pgs(
ShardServices &shard_services,
epoch_t epoch);
@@ -181,6 +185,8 @@ class PerShardState {
HeartbeatStampsRef get_hb_stamps(int peer);
std::map<int, HeartbeatStampsRef> heartbeat_stamps;
+ seastar::future<> update_shard_superblock(OSDSuperblock superblock);
+
// Time state
const ceph::mono_time startup_time;
ceph::signedspan get_mnow() const {
@@ -188,6 +194,8 @@ class PerShardState {
return ceph::mono_clock::now() - startup_time;
}
+ OSDSuperblock per_shard_superblock;
+
public:
PerShardState(
int whoami,
@@ -256,7 +264,7 @@ private:
}
OSDSuperblock superblock;
- void set_superblock(OSDSuperblock _superblock) {
+ void set_singleton_superblock(OSDSuperblock _superblock) {
superblock = std::move(_superblock);
}
@@ -316,6 +324,7 @@ private:
epoch_t e, bufferlist&& bl);
seastar::future<> store_maps(ceph::os::Transaction& t,
epoch_t start, Ref<MOSDMap> m);
+ void trim_maps(ceph::os::Transaction& t, OSDSuperblock& superblock);
};
/**
@@ -508,6 +517,8 @@ public:
FORWARD_TO_OSD_SINGLETON(send_pg_temp)
FORWARD_TO_LOCAL_CONST(get_mnow)
FORWARD_TO_LOCAL(get_hb_stamps)
+ FORWARD_TO_LOCAL(update_shard_superblock)
+ FORWARD_TO_LOCAL(get_osdmap_tlb)
FORWARD(pg_created, pg_created, local_state.pg_map)
diff --git a/src/include/cephfs/types.h b/src/include/cephfs/types.h
index cca0a619305..2abfe29b08c 100644
--- a/src/include/cephfs/types.h
+++ b/src/include/cephfs/types.h
@@ -342,6 +342,8 @@ public:
}
void encode(ceph::buffer::list &bl) const;
void decode(ceph::buffer::list::const_iterator& bl);
+ void dump(ceph::Formatter *f) const;
+ static void generate_test_instances(std::list<inline_data_t*>& ls);
version_t version = 1;
diff --git a/src/include/frag.h b/src/include/frag.h
index ec18bddfbb1..5ea2429aee8 100644
--- a/src/include/frag.h
+++ b/src/include/frag.h
@@ -159,6 +159,15 @@ public:
ceph::decode_raw(v, p);
_enc = v;
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("value", value());
+ f->dump_unsigned("bits", bits());
+ }
+ static void generate_test_instances(std::list<frag_t*>& ls) {
+ ls.push_back(new frag_t);
+ ls.push_back(new frag_t(10, 2));
+ ls.push_back(new frag_t(11, 3));
+ }
bool operator<(const frag_t& b) const
{
if (value() != b.value())
@@ -525,6 +534,11 @@ public:
}
f->close_section(); // splits
}
+
+ static void generate_test_instances(std::list<fragtree_t*>& ls) {
+ ls.push_back(new fragtree_t);
+ ls.push_back(new fragtree_t);
+ }
};
WRITE_CLASS_ENCODER(fragtree_t)
diff --git a/src/include/fs_types.h b/src/include/fs_types.h
index c1932bfcc30..606b9c6503d 100644
--- a/src/include/fs_types.h
+++ b/src/include/fs_types.h
@@ -75,6 +75,13 @@ struct inodeno_t {
using ceph::decode;
decode(val, p);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("val", val);
+ }
+ static void generate_test_instances(std::list<inodeno_t*>& ls) {
+ ls.push_back(new inodeno_t(1));
+ ls.push_back(new inodeno_t(123456789));
+ }
} __attribute__ ((__may_alias__));
WRITE_CLASS_ENCODER(inodeno_t)
diff --git a/src/include/mempool.h b/src/include/mempool.h
index 076c62afe19..a6dca48dd6f 100644
--- a/src/include/mempool.h
+++ b/src/include/mempool.h
@@ -26,6 +26,10 @@
#include <boost/container/flat_set.hpp>
#include <boost/container/flat_map.hpp>
+#if defined(_GNU_SOURCE) && defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
+# include <sched.h>
+#endif
+
#include "common/Formatter.h"
#include "common/ceph_atomic.h"
#include "include/ceph_assert.h"
@@ -201,6 +205,24 @@ enum {
num_shards = 1 << num_shard_bits
};
+static size_t pick_a_shard_int() {
+#if defined(_GNU_SOURCE) && defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
+ // a thread local storage is actually just an approximation;
+ // what we truly want is a _cpu local storage_.
+ //
+ // on the architectures we care about sched_getcpu() is
+ // a syscall-handled-in-userspace (vdso!). it grabs the cpu
+ // id kernel exposes to a task on context switch.
+ return sched_getcpu() & ((1 << num_shard_bits) - 1);
+#else
+ // Dirt cheap, see:
+ // https://fossies.org/dox/glibc-2.32/pthread__self_8c_source.html
+ size_t me = (size_t)pthread_self();
+ size_t i = (me >> CEPH_PAGE_SHIFT) & ((1 << num_shard_bits) - 1);
+ return i;
+#endif
+}
+
//
// Align shard to a cacheline.
//
@@ -240,7 +262,18 @@ const char *get_pool_name(pool_index_t ix);
struct type_t {
const char *type_name;
size_t item_size;
+#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
+ struct type_shard_t {
+ ceph::atomic<ssize_t> items = {0}; // signed
+ char __padding[128 - sizeof(ceph::atomic<ssize_t>)];
+ } __attribute__ ((aligned (128)));
+ static_assert(sizeof(type_shard_t) == 128,
+ "type_shard_t should be cacheline-sized");
+ type_shard_t shards[num_shards];
+#else
+// XXX: consider dropping this case for classic with perf tests
ceph::atomic<ssize_t> items = {0}; // signed
+#endif
};
struct type_info_hash {
@@ -255,6 +288,8 @@ class pool_t {
mutable std::mutex lock; // only used for types list
std::unordered_map<const char *, type_t> type_map;
+ template<pool_index_t, typename T>
+ friend class pool_allocator;
public:
//
// How much this pool consumes. O(<num_shards>)
@@ -264,19 +299,6 @@ public:
void adjust_count(ssize_t items, ssize_t bytes);
- static size_t pick_a_shard_int() {
- // Dirt cheap, see:
- // https://fossies.org/dox/glibc-2.32/pthread__self_8c_source.html
- size_t me = (size_t)pthread_self();
- size_t i = (me >> CEPH_PAGE_SHIFT) & ((1 << num_shard_bits) - 1);
- return i;
- }
-
- shard_t* pick_a_shard() {
- size_t i = pick_a_shard_int();
- return &shard[i];
- }
-
type_t *get_type(const std::type_info& ti, size_t size) {
std::lock_guard<std::mutex> l(lock);
auto p = type_map.find(ti.name());
@@ -339,11 +361,16 @@ public:
T* allocate(size_t n, void *p = nullptr) {
size_t total = sizeof(T) * n;
- shard_t *shard = pool->pick_a_shard();
- shard->bytes += total;
- shard->items += n;
+ const auto shid = pick_a_shard_int();
+ auto& shard = pool->shard[shid];
+ shard.bytes += total;
+ shard.items += n;
if (type) {
+#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
+ type->shards[shid].items += n;
+#else
type->items += n;
+#endif
}
T* r = reinterpret_cast<T*>(new char[total]);
return r;
@@ -351,22 +378,32 @@ public:
void deallocate(T* p, size_t n) {
size_t total = sizeof(T) * n;
- shard_t *shard = pool->pick_a_shard();
- shard->bytes -= total;
- shard->items -= n;
+ const auto shid = pick_a_shard_int();
+ auto& shard = pool->shard[shid];
+ shard.bytes -= total;
+ shard.items -= n;
if (type) {
+#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
+ type->shards[shid].items -= n;
+#else
type->items -= n;
+#endif
}
delete[] reinterpret_cast<char*>(p);
}
T* allocate_aligned(size_t n, size_t align, void *p = nullptr) {
size_t total = sizeof(T) * n;
- shard_t *shard = pool->pick_a_shard();
- shard->bytes += total;
- shard->items += n;
+ const auto shid = pick_a_shard_int();
+ auto& shard = pool->shard[shid];
+ shard.bytes += total;
+ shard.items += n;
if (type) {
+#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
+ type->shards[shid].items += n;
+#else
type->items += n;
+#endif
}
char *ptr;
int rc = ::posix_memalign((void**)(void*)&ptr, align, total);
@@ -378,11 +415,16 @@ public:
void deallocate_aligned(T* p, size_t n) {
size_t total = sizeof(T) * n;
- shard_t *shard = pool->pick_a_shard();
- shard->bytes -= total;
- shard->items -= n;
+ const auto shid = pick_a_shard_int();
+ auto& shard = pool->shard[shid];
+ shard.bytes -= total;
+ shard.items -= n;
if (type) {
+#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
+ type->shards[shid].items -= n;
+#else
type->items -= n;
+#endif
}
aligned_free(p);
}
diff --git a/src/include/object.h b/src/include/object.h
index 4564af86e57..2e5fb471c13 100644
--- a/src/include/object.h
+++ b/src/include/object.h
@@ -25,6 +25,7 @@
#include "include/rados.h"
#include "include/unordered_map.h"
+#include "common/Formatter.h"
#include "hash.h"
#include "encoding.h"
@@ -58,6 +59,15 @@ struct object_t {
using ceph::decode;
decode(name, bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_string("name", name);
+ }
+
+ static void generate_test_instances(std::list<object_t*>& o) {
+ o.push_back(new object_t);
+ o.push_back(new object_t("myobject"));
+ }
};
WRITE_CLASS_ENCODER(object_t)
@@ -170,6 +180,14 @@ struct sobject_t {
decode(oid, bl);
decode(snap, bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_stream("oid") << oid;
+ f->dump_stream("snap") << snap;
+ }
+ static void generate_test_instances(std::list<sobject_t*>& o) {
+ o.push_back(new sobject_t);
+ o.push_back(new sobject_t(object_t("myobject"), 123));
+ }
};
WRITE_CLASS_ENCODER(sobject_t)
diff --git a/src/include/types.h b/src/include/types.h
index bdd09a53df1..a50a506eb0b 100644
--- a/src/include/types.h
+++ b/src/include/types.h
@@ -371,6 +371,14 @@ struct client_t {
using ceph::decode;
decode(v, bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_int("id", v);
+ }
+ static void generate_test_instances(std::list<client_t*>& ls) {
+ ls.push_back(new client_t);
+ ls.push_back(new client_t(1));
+ ls.push_back(new client_t(123));
+ }
};
WRITE_CLASS_ENCODER(client_t)
@@ -517,7 +525,13 @@ struct shard_id_t {
using ceph::decode;
decode(id, bl);
}
-
+ void dump(ceph::Formatter *f) const {
+ f->dump_int("id", id);
+ }
+ static void generate_test_instances(std::list<shard_id_t*>& ls) {
+ ls.push_back(new shard_id_t(1));
+ ls.push_back(new shard_id_t(2));
+ }
bool operator==(const shard_id_t&) const = default;
auto operator<=>(const shard_id_t&) const = default;
};
@@ -561,6 +575,13 @@ struct errorcode32_t {
decode(code, bl);
code = ceph_to_hostos_errno(code);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_int("code", code);
+ }
+ static void generate_test_instances(std::list<errorcode32_t*>& ls) {
+ ls.push_back(new errorcode32_t(1));
+ ls.push_back(new errorcode32_t(2));
+ }
};
WRITE_CLASS_ENCODER(errorcode32_t)
@@ -602,6 +623,16 @@ struct sha_digest_t {
decode(tmparr, bl);
memcpy(v, tmparr.data(), SIZE);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_string("sha1", to_str());
+ }
+ static void generate_test_instances(std::list<sha_digest_t*>& ls) {
+ ls.push_back(new sha_digest_t);
+ ls.push_back(new sha_digest_t);
+ ls.back()->v[0] = 1;
+ ls.push_back(new sha_digest_t);
+ ls.back()->v[0] = 2;
+ }
};
template<uint8_t S>
diff --git a/src/librados/snap_set_diff.cc b/src/librados/snap_set_diff.cc
index 06f76b02345..f80105b44ae 100644
--- a/src/librados/snap_set_diff.cc
+++ b/src/librados/snap_set_diff.cc
@@ -76,7 +76,6 @@ void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set,
saw_start = true;
}
- *end_size = r->size;
if (end < a) {
ldout(cct, 20) << " past end " << end << ", end object does not exist" << dendl;
*end_exists = false;
@@ -88,6 +87,7 @@ void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set,
}
if (end <= b) {
ldout(cct, 20) << " end" << dendl;
+ *end_size = r->size;
*end_exists = true;
*clone_end_snap_id = b;
break;
diff --git a/src/librbd/Journal.cc b/src/librbd/Journal.cc
index 8ddce2e8f7d..1b37a30c17c 100644
--- a/src/librbd/Journal.cc
+++ b/src/librbd/Journal.cc
@@ -39,6 +39,7 @@ using util::create_async_context_callback;
using util::create_context_callback;
using journal::util::C_DecodeTag;
using journal::util::C_DecodeTags;
+using io::Extents;
namespace {
@@ -760,36 +761,87 @@ void Journal<I>::user_flushed() {
}
template <typename I>
-uint64_t Journal<I>::append_write_event(uint64_t offset, size_t length,
- const bufferlist &bl,
- bool flush_entry) {
+void Journal<I>::add_write_event_entries(uint64_t offset, size_t length,
+ const bufferlist &bl,
+ uint64_t buffer_offset,
+ Bufferlists *bufferlists) {
ceph_assert(m_max_append_size > journal::AioWriteEvent::get_fixed_size());
- uint64_t max_write_data_size =
+ const uint64_t max_write_data_size =
m_max_append_size - journal::AioWriteEvent::get_fixed_size();
// ensure that the write event fits within the journal entry
- Bufferlists bufferlists;
uint64_t bytes_remaining = length;
uint64_t event_offset = 0;
do {
uint64_t event_length = std::min(bytes_remaining, max_write_data_size);
bufferlist event_bl;
- event_bl.substr_of(bl, event_offset, event_length);
+ event_bl.substr_of(bl, buffer_offset + event_offset, event_length);
journal::EventEntry event_entry(journal::AioWriteEvent(offset + event_offset,
event_length,
event_bl),
ceph_clock_now());
- bufferlists.emplace_back();
- encode(event_entry, bufferlists.back());
+ bufferlists->emplace_back();
+ encode(event_entry, bufferlists->back());
event_offset += event_length;
bytes_remaining -= event_length;
} while (bytes_remaining > 0);
+}
- return append_io_events(journal::EVENT_TYPE_AIO_WRITE, bufferlists, offset,
- length, flush_entry, 0);
+template <typename I>
+uint64_t Journal<I>::append_write_event(const Extents &image_extents,
+ const bufferlist &bl,
+ bool flush_entry) {
+ Bufferlists bufferlists;
+ uint64_t buffer_offset = 0;
+ for (auto &extent : image_extents) {
+ add_write_event_entries(extent.first, extent.second, bl, buffer_offset,
+ &bufferlists);
+
+ buffer_offset += extent.second;
+ }
+
+ return append_io_events(journal::EVENT_TYPE_AIO_WRITE, bufferlists,
+ image_extents, flush_entry, 0);
+}
+
+template <typename I>
+uint64_t Journal<I>::append_write_same_event(const Extents &image_extents,
+ const bufferlist &bl,
+ bool flush_entry) {
+ Bufferlists bufferlists;
+ for (auto &extent : image_extents) {
+ journal::EventEntry event_entry(
+ journal::AioWriteSameEvent(extent.first, extent.second, bl),
+ ceph_clock_now());
+
+ bufferlists.emplace_back();
+ encode(event_entry, bufferlists.back());
+ }
+
+ return append_io_events(journal::EVENT_TYPE_AIO_WRITESAME, bufferlists,
+ image_extents, flush_entry, 0);
+}
+
+template <typename I>
+uint64_t Journal<I>::append_discard_event(const Extents &image_extents,
+ uint32_t discard_granularity_bytes,
+ bool flush_entry) {
+ Bufferlists bufferlists;
+ for (auto &extent : image_extents) {
+ journal::EventEntry event_entry(
+ journal::AioDiscardEvent(extent.first, extent.second,
+ discard_granularity_bytes),
+ ceph_clock_now());
+
+ bufferlists.emplace_back();
+ encode(event_entry, bufferlists.back());
+ }
+
+ return append_io_events(journal::EVENT_TYPE_AIO_DISCARD, bufferlists,
+ image_extents, flush_entry, 0);
}
template <typename I>
@@ -832,7 +884,8 @@ uint64_t Journal<I>::append_compare_and_write_event(uint64_t offset,
} while (bytes_remaining > 0);
return append_io_events(journal::EVENT_TYPE_AIO_COMPARE_AND_WRITE,
- bufferlists, offset, length, flush_entry, -EILSEQ);
+ bufferlists, {{offset, length}}, flush_entry,
+ -EILSEQ);
}
template <typename I>
@@ -842,14 +895,14 @@ uint64_t Journal<I>::append_io_event(journal::EventEntry &&event_entry,
bufferlist bl;
event_entry.timestamp = ceph_clock_now();
encode(event_entry, bl);
- return append_io_events(event_entry.get_event_type(), {bl}, offset, length,
- flush_entry, filter_ret_val);
+ return append_io_events(event_entry.get_event_type(), {bl},
+ {{offset, length}}, flush_entry, filter_ret_val);
}
template <typename I>
uint64_t Journal<I>::append_io_events(journal::EventType event_type,
const Bufferlists &bufferlists,
- uint64_t offset, size_t length,
+ const Extents &image_extents,
bool flush_entry, int filter_ret_val) {
ceph_assert(!bufferlists.empty());
@@ -870,14 +923,13 @@ uint64_t Journal<I>::append_io_events(journal::EventType event_type,
{
std::lock_guard event_locker{m_event_lock};
- m_events[tid] = Event(futures, offset, length, filter_ret_val);
+ m_events[tid] = Event(futures, image_extents, filter_ret_val);
}
CephContext *cct = m_image_ctx.cct;
ldout(cct, 20) << this << " " << __func__ << ": "
<< "event=" << event_type << ", "
- << "offset=" << offset << ", "
- << "length=" << length << ", "
+ << "image_extents=" << image_extents << ", "
<< "flush=" << flush_entry << ", tid=" << tid << dendl;
Context *on_safe = create_async_context_callback(
diff --git a/src/librbd/Journal.h b/src/librbd/Journal.h
index 1ef9ffa8830..5327adac719 100644
--- a/src/librbd/Journal.h
+++ b/src/librbd/Journal.h
@@ -18,6 +18,7 @@
#include "journal/ReplayHandler.h"
#include "librbd/Utils.h"
#include "librbd/asio/ContextWQ.h"
+#include "librbd/io/Types.h"
#include "librbd/journal/Types.h"
#include "librbd/journal/TypeTraits.h"
@@ -133,14 +134,20 @@ public:
void user_flushed();
- uint64_t append_write_event(uint64_t offset, size_t length,
+ uint64_t append_write_event(const io::Extents &image_extents,
const bufferlist &bl,
bool flush_entry);
+ uint64_t append_write_same_event(const io::Extents &image_extents,
+ const bufferlist &bl,
+ bool flush_entry);
uint64_t append_compare_and_write_event(uint64_t offset,
size_t length,
const bufferlist &cmp_bl,
const bufferlist &write_bl,
bool flush_entry);
+ uint64_t append_discard_event(const io::Extents &image_extents,
+ uint32_t discard_granularity_bytes,
+ bool flush_entry);
uint64_t append_io_event(journal::EventEntry &&event_entry,
uint64_t offset, size_t length,
bool flush_entry, int filter_ret_val);
@@ -200,11 +207,13 @@ private:
Event() {
}
- Event(const Futures &_futures, uint64_t offset, size_t length,
+ Event(const Futures &_futures, const io::Extents &image_extents,
int filter_ret_val)
: futures(_futures), filter_ret_val(filter_ret_val) {
- if (length > 0) {
- pending_extents.insert(offset, length);
+ for (auto &extent : image_extents) {
+ if (extent.second > 0) {
+ pending_extents.insert(extent.first, extent.second);
+ }
}
}
};
@@ -322,9 +331,13 @@ private:
bool is_journal_replaying(const ceph::mutex &) const;
bool is_tag_owner(const ceph::mutex &) const;
+ void add_write_event_entries(uint64_t offset, size_t length,
+ const bufferlist &bl,
+ uint64_t buffer_offset,
+ Bufferlists *bufferlists);
uint64_t append_io_events(journal::EventType event_type,
const Bufferlists &bufferlists,
- uint64_t offset, size_t length, bool flush_entry,
+ const io::Extents &extents, bool flush_entry,
int filter_ret_val);
Future wait_event(ceph::mutex &lock, uint64_t tid, Context *on_safe);
diff --git a/src/librbd/io/ImageRequest.cc b/src/librbd/io/ImageRequest.cc
index e4c41c22976..fb9f8944ed8 100644
--- a/src/librbd/io/ImageRequest.cc
+++ b/src/librbd/io/ImageRequest.cc
@@ -473,7 +473,7 @@ void AbstractImageWriteRequest<I>::send_request() {
if (journaling) {
// in-flight ops are flushed prior to closing the journal
ceph_assert(image_ctx.journal != NULL);
- journal_tid = append_journal_event(m_synchronous);
+ journal_tid = append_journal_event();
}
// it's very important that IOContext is captured here instead of
@@ -518,22 +518,12 @@ void ImageWriteRequest<I>::assemble_extent(
}
template <typename I>
-uint64_t ImageWriteRequest<I>::append_journal_event(bool synchronous) {
+uint64_t ImageWriteRequest<I>::append_journal_event() {
I &image_ctx = this->m_image_ctx;
- uint64_t tid = 0;
- uint64_t buffer_offset = 0;
ceph_assert(!this->m_image_extents.empty());
- for (auto &extent : this->m_image_extents) {
- bufferlist sub_bl;
- sub_bl.substr_of(m_bl, buffer_offset, extent.second);
- buffer_offset += extent.second;
-
- tid = image_ctx.journal->append_write_event(extent.first, extent.second,
- sub_bl, synchronous);
- }
-
- return tid;
+ return image_ctx.journal->append_write_event(
+ this->m_image_extents, m_bl, false);
}
template <typename I>
@@ -566,22 +556,12 @@ void ImageWriteRequest<I>::update_stats(size_t length) {
}
template <typename I>
-uint64_t ImageDiscardRequest<I>::append_journal_event(bool synchronous) {
+uint64_t ImageDiscardRequest<I>::append_journal_event() {
I &image_ctx = this->m_image_ctx;
- uint64_t tid = 0;
ceph_assert(!this->m_image_extents.empty());
- for (auto &extent : this->m_image_extents) {
- journal::EventEntry event_entry(
- journal::AioDiscardEvent(extent.first,
- extent.second,
- this->m_discard_granularity_bytes));
- tid = image_ctx.journal->append_io_event(std::move(event_entry),
- extent.first, extent.second,
- synchronous, 0);
- }
-
- return tid;
+ return image_ctx.journal->append_discard_event(
+ this->m_image_extents, m_discard_granularity_bytes, false);
}
template <typename I>
@@ -717,21 +697,12 @@ void ImageFlushRequest<I>::send_request() {
}
template <typename I>
-uint64_t ImageWriteSameRequest<I>::append_journal_event(bool synchronous) {
+uint64_t ImageWriteSameRequest<I>::append_journal_event() {
I &image_ctx = this->m_image_ctx;
- uint64_t tid = 0;
ceph_assert(!this->m_image_extents.empty());
- for (auto &extent : this->m_image_extents) {
- journal::EventEntry event_entry(journal::AioWriteSameEvent(extent.first,
- extent.second,
- m_data_bl));
- tid = image_ctx.journal->append_io_event(std::move(event_entry),
- extent.first, extent.second,
- synchronous, 0);
- }
-
- return tid;
+ return image_ctx.journal->append_write_same_event(
+ this->m_image_extents, m_data_bl, false);
}
template <typename I>
@@ -768,8 +739,7 @@ void ImageWriteSameRequest<I>::update_stats(size_t length) {
}
template <typename I>
-uint64_t ImageCompareAndWriteRequest<I>::append_journal_event(
- bool synchronous) {
+uint64_t ImageCompareAndWriteRequest<I>::append_journal_event() {
I &image_ctx = this->m_image_ctx;
uint64_t tid = 0;
@@ -779,7 +749,7 @@ uint64_t ImageCompareAndWriteRequest<I>::append_journal_event(
extent.second,
m_cmp_bl,
m_bl,
- synchronous);
+ false);
return tid;
}
diff --git a/src/librbd/io/ImageRequest.h b/src/librbd/io/ImageRequest.h
index 2668c1acb2c..996c90a11f2 100644
--- a/src/librbd/io/ImageRequest.h
+++ b/src/librbd/io/ImageRequest.h
@@ -114,11 +114,6 @@ private:
template <typename ImageCtxT = ImageCtx>
class AbstractImageWriteRequest : public ImageRequest<ImageCtxT> {
-public:
- inline void flag_synchronous() {
- m_synchronous = true;
- }
-
protected:
using typename ImageRequest<ImageCtxT>::ObjectRequests;
@@ -127,8 +122,7 @@ protected:
const char *trace_name,
const ZTracer::Trace &parent_trace)
: ImageRequest<ImageCtxT>(image_ctx, aio_comp, std::move(image_extents),
- area, trace_name, parent_trace),
- m_synchronous(false) {
+ area, trace_name, parent_trace) {
}
void send_request() override;
@@ -144,11 +138,8 @@ protected:
const LightweightObjectExtent &object_extent, IOContext io_context,
uint64_t journal_tid, bool single_extent, Context *on_finish) = 0;
- virtual uint64_t append_journal_event(bool synchronous) = 0;
+ virtual uint64_t append_journal_event() = 0;
virtual void update_stats(size_t length) = 0;
-
-private:
- bool m_synchronous;
};
template <typename ImageCtxT = ImageCtx>
@@ -180,7 +171,7 @@ protected:
const LightweightObjectExtent &object_extent, IOContext io_context,
uint64_t journal_tid, bool single_extent, Context *on_finish) override;
- uint64_t append_journal_event(bool synchronous) override;
+ uint64_t append_journal_event() override;
void update_stats(size_t length) override;
private:
@@ -215,7 +206,7 @@ protected:
const LightweightObjectExtent &object_extent, IOContext io_context,
uint64_t journal_tid, bool single_extent, Context *on_finish) override;
- uint64_t append_journal_event(bool synchronous) override;
+ uint64_t append_journal_event() override;
void update_stats(size_t length) override;
int prune_object_extents(
@@ -283,7 +274,7 @@ protected:
const LightweightObjectExtent &object_extent, IOContext io_context,
uint64_t journal_tid, bool single_extent, Context *on_finish) override;
- uint64_t append_journal_event(bool synchronous) override;
+ uint64_t append_journal_event() override;
void update_stats(size_t length) override;
private:
bufferlist m_data_bl;
@@ -315,7 +306,7 @@ protected:
const LightweightObjectExtent &object_extent, IOContext io_context,
uint64_t journal_tid, bool single_extent, Context *on_finish) override;
- uint64_t append_journal_event(bool synchronous) override;
+ uint64_t append_journal_event() override;
void update_stats(size_t length) override;
aio_type_t get_aio_type() const override {
diff --git a/src/librbd/io/ObjectRequest.cc b/src/librbd/io/ObjectRequest.cc
index 827f551d1f7..2bf06966c36 100644
--- a/src/librbd/io/ObjectRequest.cc
+++ b/src/librbd/io/ObjectRequest.cc
@@ -834,16 +834,17 @@ void ObjectListSnapsRequest<I>::handle_list_snaps(int r) {
end_snap_id, &diff, &end_size, &exists,
&clone_end_snap_id, &read_whole_object);
- if (read_whole_object ||
- (!diff.empty() &&
- ((m_list_snaps_flags & LIST_SNAPS_FLAG_WHOLE_OBJECT) != 0))) {
+ if (read_whole_object) {
ldout(cct, 1) << "need to read full object" << dendl;
- diff.clear();
diff.insert(0, image_ctx->layout.object_size);
+ exists = true;
end_size = image_ctx->layout.object_size;
clone_end_snap_id = end_snap_id;
- } else if (!exists) {
- end_size = 0;
+ } else if ((m_list_snaps_flags & LIST_SNAPS_FLAG_WHOLE_OBJECT) != 0 &&
+ !diff.empty()) {
+ ldout(cct, 20) << "expanding diff from " << diff << dendl;
+ diff.clear();
+ diff.insert(0, image_ctx->layout.object_size);
}
if (exists) {
@@ -884,7 +885,7 @@ void ObjectListSnapsRequest<I>::handle_list_snaps(int r) {
<< "end_size=" << end_size << ", "
<< "prev_end_size=" << prev_end_size << ", "
<< "exists=" << exists << ", "
- << "whole_object=" << read_whole_object << dendl;
+ << "read_whole_object=" << read_whole_object << dendl;
// check if object exists prior to start of incremental snap delta so that
// we don't DNE the object if no additional deltas exist
diff --git a/src/librbd/librbd.cc b/src/librbd/librbd.cc
index 1272d95dd7e..132a0084a9f 100644
--- a/src/librbd/librbd.cc
+++ b/src/librbd/librbd.cc
@@ -15,6 +15,12 @@
#include <errno.h>
+// these strand headers declare static variables that need to be shared between
+// librbd.so and librados.so. referencing them here causes librbd.so to link
+// their symbols as 'global unique'. see https://tracker.ceph.com/issues/63682
+#include <boost/asio/strand.hpp>
+#include <boost/asio/io_context_strand.hpp>
+
#include "common/deleter.h"
#include "common/dout.h"
#include "common/errno.h"
diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc
index 0484c38cc30..a8aaf11c051 100644
--- a/src/mds/CDir.cc
+++ b/src/mds/CDir.cc
@@ -3752,6 +3752,7 @@ bool CDir::scrub_local()
mdcache->repair_dirfrag_stats(this);
scrub_infop->header->set_repaired();
good = true;
+ mdcache->mds->damage_table.remove_dentry_damage_entry(this);
}
return good;
}
diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc
index 621d8e93e40..61a6854ea8e 100644
--- a/src/mds/CInode.cc
+++ b/src/mds/CInode.cc
@@ -4787,6 +4787,7 @@ next:
false);
// Flag that we repaired this BT so that it won't go into damagetable
results->backtrace.repaired = true;
+ in->mdcache->mds->damage_table.remove_backtrace_damage_entry(in->ino());
if (in->mdcache->mds->logger)
in->mdcache->mds->logger->inc(l_mds_scrub_backtrace_repaired);
}
@@ -4925,6 +4926,9 @@ next:
<< "freshly-calculated rstats don't match existing ones (will be fixed)";
in->mdcache->repair_inode_stats(in);
results->raw_stats.repaired = true;
+ for (const auto &p : in->dirfrags){
+ in->mdcache->mds->damage_table.remove_dirfrag_damage_entry(p.second);
+ }
} else {
results->raw_stats.error_str
<< "freshly-calculated rstats don't match existing ones";
diff --git a/src/mds/DamageTable.cc b/src/mds/DamageTable.cc
index 22802079d85..2079d23333a 100644
--- a/src/mds/DamageTable.cc
+++ b/src/mds/DamageTable.cc
@@ -15,6 +15,7 @@
#include "common/debug.h"
#include "mds/CDir.h"
+#include "mds/CInode.h"
#include "DamageTable.h"
@@ -200,6 +201,33 @@ bool DamageTable::notify_remote_damaged(inodeno_t ino, std::string_view path)
return false;
}
+void DamageTable::remove_dentry_damage_entry(CDir *dir)
+{
+ if (dentries.count(
+ DirFragIdent(dir->inode->ino(), dir->frag)
+ ) > 0){
+ const auto frag_dentries =
+ dentries.at(DirFragIdent(dir->inode->ino(), dir->frag));
+ for(const auto &i : frag_dentries) {
+ erase(i.second->id);
+ }
+ }
+}
+
+void DamageTable::remove_dirfrag_damage_entry(CDir *dir)
+{
+ if (is_dirfrag_damaged(dir)){
+ erase(dirfrags.find(DirFragIdent(dir->inode->ino(), dir->frag))->second->id);
+ }
+}
+
+void DamageTable::remove_backtrace_damage_entry(inodeno_t ino)
+{
+ if (is_remote_damaged(ino)){
+ erase(remotes.find(ino)->second->id);
+ }
+}
+
bool DamageTable::oversized() const
{
return by_id.size() > (size_t)(g_conf()->mds_damage_table_max_entries);
diff --git a/src/mds/DamageTable.h b/src/mds/DamageTable.h
index 18a61e08b12..a1b96fe2218 100644
--- a/src/mds/DamageTable.h
+++ b/src/mds/DamageTable.h
@@ -22,6 +22,7 @@
#include "include/random.h"
class CDir;
+class CInode;
typedef uint64_t damage_entry_id_t;
@@ -155,6 +156,12 @@ class DamageTable
*/
bool notify_remote_damaged(inodeno_t ino, std::string_view path);
+ void remove_dentry_damage_entry(CDir *dir);
+
+ void remove_dirfrag_damage_entry(CDir *dir);
+
+ void remove_backtrace_damage_entry(inodeno_t ino);
+
bool is_dentry_damaged(
const CDir *dir_frag,
std::string_view dname,
diff --git a/src/mds/MDSMap.cc b/src/mds/MDSMap.cc
index 47c823bf763..013aefcf048 100644
--- a/src/mds/MDSMap.cc
+++ b/src/mds/MDSMap.cc
@@ -770,7 +770,7 @@ void MDSMap::encode(bufferlist& bl, uint64_t features) const
encode(data_pools, bl);
encode(cas_pool, bl);
- __u16 ev = 17;
+ __u16 ev = 18;
encode(ev, bl);
encode(compat, bl);
encode(metadata_pool, bl);
@@ -947,6 +947,9 @@ void MDSMap::decode(bufferlist::const_iterator& p)
if (ev >= 17) {
decode(max_xattr_size, p);
+ }
+
+ if (ev >= 18) {
decode(bal_rank_mask, p);
}
diff --git a/src/mds/locks.c b/src/mds/locks.c
index dbe3ab8eb00..f6ff8b9824a 100644
--- a/src/mds/locks.c
+++ b/src/mds/locks.c
@@ -117,7 +117,7 @@ const struct sm_state_t filelock[LOCK_MAX] = {
[LOCK_XSYN_EXCL] = { LOCK_EXCL, true, LOCK_LOCK, AUTH, 0, XCL, 0, 0, 0, 0, 0,CEPH_CAP_GCACHE|CEPH_CAP_GBUFFER,0,0 },
[LOCK_XSYN] = { 0, true, LOCK_LOCK, AUTH, AUTH,AUTH,XCL, 0, 0, 0, 0,CEPH_CAP_GCACHE|CEPH_CAP_GBUFFER,0,0 },
- [LOCK_EXCL_XSYN] = { LOCK_XSYN, false, LOCK_LOCK, 0, 0, XCL, 0, 0, 0, 0, 0,CEPH_CAP_GCACHE|CEPH_CAP_GBUFFER,0,0 },
+ [LOCK_EXCL_XSYN] = { LOCK_XSYN, true, LOCK_LOCK, 0, 0, XCL, 0, 0, 0, 0, 0,CEPH_CAP_GCACHE|CEPH_CAP_GBUFFER,0,0 },
[LOCK_PRE_SCAN] = { LOCK_SCAN, false, LOCK_LOCK, 0, 0, 0, 0, 0, 0, 0, 0,0,0,0 },
[LOCK_SCAN] = { LOCK_LOCK, false, LOCK_LOCK, 0, 0, 0, 0, 0, 0, 0, 0,0,0,0 },
diff --git a/src/mds/mdstypes.cc b/src/mds/mdstypes.cc
index 044c3345960..21e17ca0e5b 100644
--- a/src/mds/mdstypes.cc
+++ b/src/mds/mdstypes.cc
@@ -284,6 +284,21 @@ void inline_data_t::decode(bufferlist::const_iterator &p)
free_data();
}
+void inline_data_t::dump(Formatter *f) const
+{
+ f->dump_unsigned("version", version);
+ f->dump_unsigned("length", length());
+}
+
+void inline_data_t::generate_test_instances(std::list<inline_data_t*>& ls)
+{
+ ls.push_back(new inline_data_t);
+ ls.push_back(new inline_data_t);
+ bufferlist bl;
+ bl.append("inline data");
+ ls.back()->set_data(bl);
+}
+
/*
* fnode_t
diff --git a/src/messages/MClientReply.h b/src/messages/MClientReply.h
index be33fad4949..028c4200c14 100644
--- a/src/messages/MClientReply.h
+++ b/src/messages/MClientReply.h
@@ -291,7 +291,7 @@ struct InodeStat {
};
struct openc_response_t {
- _inodeno_t created_ino;
+ _inodeno_t created_ino{0};
interval_set<inodeno_t> delegated_inos;
public:
@@ -309,6 +309,16 @@ public:
decode(delegated_inos, p);
DECODE_FINISH(p);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("created_ino", created_ino);
+ f->dump_stream("delegated_inos") << delegated_inos;
+ }
+ static void generate_test_instances(std::list<openc_response_t*>& ls) {
+ ls.push_back(new openc_response_t);
+ ls.push_back(new openc_response_t);
+ ls.back()->created_ino = 1;
+ ls.back()->delegated_inos.insert(1, 10);
+ }
} __attribute__ ((__may_alias__));
WRITE_CLASS_ENCODER(openc_response_t)
diff --git a/src/messages/MClientRequest.h b/src/messages/MClientRequest.h
index f63657d2e25..cb9a888b0be 100644
--- a/src/messages/MClientRequest.h
+++ b/src/messages/MClientRequest.h
@@ -60,6 +60,17 @@ struct SnapPayload {
decode(metadata, iter);
DECODE_FINISH(iter);
}
+ void dump(ceph::Formatter *f) const {
+ for (const auto &i : metadata) {
+ f->dump_string(i.first.c_str(), i.second);
+ }
+ }
+ static void generate_test_instances(std::list<SnapPayload *> &o) {
+ o.push_back(new SnapPayload);
+ o.push_back(new SnapPayload);
+ o.back()->metadata["key1"] = "val1";
+ o.back()->metadata["key2"] = "val2";
+ }
};
WRITE_CLASS_ENCODER(SnapPayload)
@@ -95,6 +106,26 @@ public:
decode(item, bl);
ceph::decode_nohead(item.dname_len, dname, bl);
}
+
+ void dump(ceph::Formatter *f) const {
+ f->dump_string("dname", dname);
+ f->dump_unsigned("ino", item.ino);
+ f->dump_unsigned("cap_id", item.cap_id);
+ f->dump_unsigned("caps", item.caps);
+ f->dump_unsigned("wanted", item.wanted);
+ f->dump_unsigned("seq", item.seq);
+ f->dump_unsigned("issue_seq", item.issue_seq);
+ f->dump_unsigned("mseq", item.mseq);
+ f->dump_unsigned("dname_seq", item.dname_seq);
+ f->dump_unsigned("dname_len", item.dname_len);
+ }
+
+ static void generate_test_instances(std::list<Release*>& ls) {
+ ls.push_back(new Release);
+ ls.push_back(new Release);
+ ls.back()->item.dname_len = 4;
+ ls.back()->dname = "test";
+ }
};
mutable std::vector<Release> releases; /* XXX HACK! */
diff --git a/src/messages/MMDSCacheRejoin.h b/src/messages/MMDSCacheRejoin.h
index a9211d53538..9237c79b6e7 100644
--- a/src/messages/MMDSCacheRejoin.h
+++ b/src/messages/MMDSCacheRejoin.h
@@ -63,6 +63,17 @@ public:
decode(nestlock, bl);
decode(dftlock, bl);
}
+ void dump(ceph::Formatter *f) const {
+ f->dump_int("nonce", nonce);
+ f->dump_int("caps_wanted", caps_wanted);
+ f->dump_int("filelock", filelock);
+ f->dump_int("nestlock", nestlock);
+ f->dump_int("dftlock", dftlock);
+ }
+ static void generate_test_instances(std::list<inode_strong*>& ls) {
+ ls.push_back(new inode_strong);
+ ls.push_back(new inode_strong(1, 2, 3, 4, 5));
+ }
};
WRITE_CLASS_ENCODER(inode_strong)
@@ -81,6 +92,14 @@ public:
decode(nonce, bl);
decode(dir_rep, bl);
}
+ static void generate_test_instances(std::list<dirfrag_strong*>& ls) {
+ ls.push_back(new dirfrag_strong);
+ ls.push_back(new dirfrag_strong(1, 2));
+ }
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("nonce", nonce);
+ f->dump_unsigned("dir_rep", dir_rep);
+ }
};
WRITE_CLASS_ENCODER(dirfrag_strong)
@@ -118,6 +137,19 @@ public:
decode(lock, bl);
decode(alternate_name, bl);
}
+ static void generate_test_instances(std::list<dn_strong*>& ls) {
+ ls.push_back(new dn_strong);
+ ls.push_back(new dn_strong(1, "alternate_name", 2, 3, 4, 5, 6));
+ }
+ void dump(ceph::Formatter *f) const {
+ f->dump_unsigned("first", first);
+ f->dump_string("alternate_name", alternate_name);
+ f->dump_unsigned("ino", ino);
+ f->dump_unsigned("remote_ino", remote_ino);
+ f->dump_unsigned("remote_d_type", remote_d_type);
+ f->dump_unsigned("nonce", nonce);
+ f->dump_unsigned("lock", lock);
+ }
};
WRITE_CLASS_ENCODER(dn_strong)
diff --git a/src/messages/MMgrReport.h b/src/messages/MMgrReport.h
index bd3c221a847..e81282dc1a0 100644
--- a/src/messages/MMgrReport.h
+++ b/src/messages/MMgrReport.h
@@ -74,6 +74,27 @@ public:
}
DECODE_FINISH(p);
}
+
+ void dump(ceph::Formatter *f) const
+ {
+ f->dump_string("path", path);
+ f->dump_string("description", description);
+ f->dump_string("nick", nick);
+ f->dump_int("type", type);
+ f->dump_int("priority", priority);
+ f->dump_int("unit", unit);
+ }
+ static void generate_test_instances(std::list<PerfCounterType*>& ls)
+ {
+ ls.push_back(new PerfCounterType);
+ ls.push_back(new PerfCounterType);
+ ls.back()->path = "mycounter";
+ ls.back()->description = "mycounter description";
+ ls.back()->nick = "mycounter nick";
+ ls.back()->type = PERFCOUNTER_COUNTER;
+ ls.back()->priority = PerfCountersBuilder::PRIO_CRITICAL;
+ ls.back()->unit = UNIT_BYTES;
+ }
};
WRITE_CLASS_ENCODER(PerfCounterType)
diff --git a/src/messages/MOSDScrubReserve.h b/src/messages/MOSDScrubReserve.h
index f1f76b3e6fe..c7ab9854117 100644
--- a/src/messages/MOSDScrubReserve.h
+++ b/src/messages/MOSDScrubReserve.h
@@ -24,7 +24,7 @@ private:
public:
spg_t pgid;
epoch_t map_epoch;
- enum {
+ enum ReserveMsgOp {
REQUEST = 0,
GRANT = 1,
RELEASE = 2,
diff --git a/src/mgr/ActivePyModules.cc b/src/mgr/ActivePyModules.cc
index 4f75670591a..17bb3951142 100644
--- a/src/mgr/ActivePyModules.cc
+++ b/src/mgr/ActivePyModules.cc
@@ -555,42 +555,6 @@ void ActivePyModules::start_one(PyModuleRef py_module)
}));
}
-void ActivePyModules::shutdown()
-{
- std::lock_guard locker(lock);
-
- // Stop per active module finisher thread
- for (auto& [name, module] : modules) {
- dout(4) << "Stopping active module " << name << " finisher thread" << dendl;
- module->finisher.wait_for_empty();
- module->finisher.stop();
- }
-
- // Signal modules to drop out of serve() and/or tear down resources
- for (auto& [name, module] : modules) {
- lock.unlock();
- dout(10) << "calling module " << name << " shutdown()" << dendl;
- module->shutdown();
- dout(10) << "module " << name << " shutdown() returned" << dendl;
- lock.lock();
- }
-
- // For modules implementing serve(), finish the threads where we
- // were running that.
- for (auto& [name, module] : modules) {
- lock.unlock();
- dout(10) << "joining module " << name << dendl;
- module->thread.join();
- dout(10) << "joined module " << name << dendl;
- lock.lock();
- }
-
- cmd_finisher.wait_for_empty();
- cmd_finisher.stop();
-
- modules.clear();
-}
-
void ActivePyModules::notify_all(const std::string &notify_type,
const std::string &notify_id)
{
diff --git a/src/mgr/ActivePyModules.h b/src/mgr/ActivePyModules.h
index 283f96a6ed9..d6ade4849f7 100644
--- a/src/mgr/ActivePyModules.h
+++ b/src/mgr/ActivePyModules.h
@@ -216,7 +216,6 @@ public:
std::string *err);
int init();
- void shutdown();
void start_one(PyModuleRef py_module);
diff --git a/src/mgr/ClusterState.cc b/src/mgr/ClusterState.cc
index 7f811a5e415..6b106268efc 100644
--- a/src/mgr/ClusterState.cc
+++ b/src/mgr/ClusterState.cc
@@ -225,14 +225,6 @@ void ClusterState::final_init()
ceph_assert(r == 0);
}
-void ClusterState::shutdown()
-{
- // unregister commands
- g_ceph_context->get_admin_socket()->unregister_commands(asok_hook);
- delete asok_hook;
- asok_hook = NULL;
-}
-
bool ClusterState::asok_command(
std::string_view admin_command,
const cmdmap_t& cmdmap,
diff --git a/src/mgr/ClusterState.h b/src/mgr/ClusterState.h
index 7939cd8eb8f..2beac362b47 100644
--- a/src/mgr/ClusterState.h
+++ b/src/mgr/ClusterState.h
@@ -152,7 +152,6 @@ public:
}
void final_init();
- void shutdown();
bool asok_command(std::string_view admin_command,
const cmdmap_t& cmdmap,
Formatter *f,
diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc
index a4e85e2fc42..b1781316f82 100644
--- a/src/mgr/DaemonServer.cc
+++ b/src/mgr/DaemonServer.cc
@@ -98,7 +98,6 @@ DaemonServer::DaemonServer(MonClient *monc_,
audit_clog(audit_clog_),
pgmap_ready(false),
timer(g_ceph_context, lock),
- shutting_down(false),
tick_event(nullptr),
osd_perf_metric_collector_listener(this),
osd_perf_metric_collector(osd_perf_metric_collector_listener),
@@ -358,11 +357,6 @@ void DaemonServer::schedule_tick_locked(double delay_sec)
tick_event = nullptr;
}
- // on shutdown start rejecting explicit requests to send reports that may
- // originate from python land which may still be running.
- if (shutting_down)
- return;
-
tick_event = timer.add_event_after(delay_sec,
new LambdaContext([this](int r) {
tick();
@@ -407,19 +401,6 @@ void DaemonServer::handle_mds_perf_metric_query_updated()
}));
}
-void DaemonServer::shutdown()
-{
- dout(10) << "begin" << dendl;
- msgr->shutdown();
- msgr->wait();
- cluster_state.shutdown();
- dout(10) << "done" << dendl;
-
- std::lock_guard l(lock);
- shutting_down = true;
- timer.shutdown();
-}
-
static DaemonKey key_from_service(
const std::string& service_name,
int peer_type,
diff --git a/src/mgr/DaemonServer.h b/src/mgr/DaemonServer.h
index a7b64561004..43125533e74 100644
--- a/src/mgr/DaemonServer.h
+++ b/src/mgr/DaemonServer.h
@@ -190,7 +190,6 @@ private:
void maybe_ready(int32_t osd_id);
SafeTimer timer;
- bool shutting_down;
Context *tick_event;
void tick();
void schedule_tick_locked(double delay_sec);
@@ -255,7 +254,6 @@ private:
public:
int init(uint64_t gid, entity_addrvec_t client_addrs);
- void shutdown();
entity_addrvec_t get_myaddrs() const;
diff --git a/src/mgr/Mgr.cc b/src/mgr/Mgr.cc
index 63ad530fae2..5bd2ffb246c 100644
--- a/src/mgr/Mgr.cc
+++ b/src/mgr/Mgr.cc
@@ -214,12 +214,6 @@ std::map<std::string, std::string> Mgr::load_store()
return loaded;
}
-void Mgr::handle_signal(int signum)
-{
- ceph_assert(signum == SIGINT || signum == SIGTERM);
- shutdown();
-}
-
static void handle_mgr_signal(int signum)
{
derr << " *** Got signal " << sig_str(signum) << " ***" << dendl;
@@ -490,27 +484,6 @@ void Mgr::load_all_metadata()
}
}
-
-void Mgr::shutdown()
-{
- dout(10) << "mgr shutdown init" << dendl;
- finisher.queue(new LambdaContext([&](int) {
- {
- std::lock_guard l(lock);
- // First stop the server so that we're not taking any more incoming
- // requests
- server.shutdown();
- }
- // after the messenger is stopped, signal modules to shutdown via finisher
- py_module_registry->active_shutdown();
- }));
-
- // Then stop the finisher to ensure its enqueued contexts aren't going
- // to touch references to the things we're about to tear down
- finisher.wait_for_empty();
- finisher.stop();
-}
-
void Mgr::handle_osd_map()
{
ceph_assert(ceph_mutex_is_locked_by_me(lock));
diff --git a/src/mgr/Mgr.h b/src/mgr/Mgr.h
index 22ebdb68041..65931c331f3 100644
--- a/src/mgr/Mgr.h
+++ b/src/mgr/Mgr.h
@@ -94,9 +94,6 @@ public:
bool ms_dispatch2(const ceph::ref_t<Message>& m);
void background_init(Context *completion);
- void shutdown();
-
- void handle_signal(int signum);
std::map<std::string, std::string> get_services() const;
diff --git a/src/mgr/MgrStandby.cc b/src/mgr/MgrStandby.cc
index 545624eb79b..052e6868177 100644
--- a/src/mgr/MgrStandby.cc
+++ b/src/mgr/MgrStandby.cc
@@ -295,41 +295,6 @@ void MgrStandby::tick()
));
}
-void MgrStandby::shutdown()
-{
- finisher.queue(new LambdaContext([&](int) {
- std::lock_guard l(lock);
-
- dout(4) << "Shutting down" << dendl;
-
- py_module_registry.shutdown();
- // stop sending beacon first, I use monc to talk with monitors
- timer.shutdown();
- // client uses monc and objecter
- client.shutdown();
- mgrc.shutdown();
- // Stop asio threads, so leftover events won't call into shut down
- // monclient/objecter.
- poolctx.finish();
- // stop monc, so mon won't be able to instruct me to shutdown/activate after
- // the active_mgr is stopped
- monc.shutdown();
- if (active_mgr) {
- active_mgr->shutdown();
- }
- // objecter is used by monc and active_mgr
- objecter.shutdown();
- // client_messenger is used by all of them, so stop it in the end
- client_messenger->shutdown();
- }));
-
- // Then stop the finisher to ensure its enqueued contexts aren't going
- // to touch references to the things we're about to tear down
- finisher.wait_for_empty();
- finisher.stop();
- mgr_perf_stop(g_ceph_context);
-}
-
void MgrStandby::respawn()
{
// --- WARNING TO FUTURE COPY/PASTERS ---
diff --git a/src/mgr/MgrStandby.h b/src/mgr/MgrStandby.h
index 0f06e3074a0..5d238c85577 100644
--- a/src/mgr/MgrStandby.h
+++ b/src/mgr/MgrStandby.h
@@ -79,7 +79,6 @@ public:
bool ms_handle_refused(Connection *con) override;
int init();
- void shutdown();
void respawn();
int main(std::vector<const char *> args);
void tick();
diff --git a/src/mgr/PyModuleRegistry.cc b/src/mgr/PyModuleRegistry.cc
index f5f5008023f..eb2d2babe75 100644
--- a/src/mgr/PyModuleRegistry.cc
+++ b/src/mgr/PyModuleRegistry.cc
@@ -217,53 +217,6 @@ void PyModuleRegistry::active_start(
}
}
-void PyModuleRegistry::active_shutdown()
-{
- std::lock_guard locker(lock);
-
- if (active_modules != nullptr) {
- active_modules->shutdown();
- active_modules.reset();
- }
-}
-
-void PyModuleRegistry::shutdown()
-{
- std::lock_guard locker(lock);
-
- if (standby_modules != nullptr) {
- standby_modules->shutdown();
- standby_modules.reset();
- }
-
- // Ideally, now, we'd be able to do this for all modules:
- //
- // Py_EndInterpreter(pMyThreadState);
- // PyThreadState_Swap(pMainThreadState);
- //
- // Unfortunately, if the module has any other *python* threads active
- // at this point, Py_EndInterpreter() will abort with:
- //
- // Fatal Python error: Py_EndInterpreter: not the last thread
- //
- // This can happen when using CherryPy in a module, becuase CherryPy
- // runs an extra thread as a timeout monitor, which spends most of its
- // life inside a time.sleep(60). Unless you are very, very lucky with
- // the timing calling this destructor, that thread will still be stuck
- // in a sleep, and Py_EndInterpreter() will abort.
- //
- // This could of course also happen with a poorly written module which
- // made no attempt to clean up any additional threads it created.
- //
- // The safest thing to do is just not call Py_EndInterpreter(), and
- // let Py_Finalize() kill everything after all modules are shut down.
-
- modules.clear();
-
- PyEval_RestoreThread(pMainThreadState);
- Py_Finalize();
-}
-
std::vector<std::string> PyModuleRegistry::probe_modules(const std::string &path) const
{
const auto opt = g_conf().get_val<std::string>("mgr_disabled_modules");
diff --git a/src/mgr/PyModuleRegistry.h b/src/mgr/PyModuleRegistry.h
index 9af9abb5762..9d6d9c2cdd0 100644
--- a/src/mgr/PyModuleRegistry.h
+++ b/src/mgr/PyModuleRegistry.h
@@ -122,9 +122,6 @@ public:
return standby_modules != nullptr;
}
- void active_shutdown();
- void shutdown();
-
std::vector<MonCommand> get_commands() const;
std::vector<ModuleCommand> get_py_commands() const;
diff --git a/src/os/bluestore/AvlAllocator.cc b/src/os/bluestore/AvlAllocator.cc
index 1a170d13fdb..8de249e978c 100644
--- a/src/os/bluestore/AvlAllocator.cc
+++ b/src/os/bluestore/AvlAllocator.cc
@@ -39,7 +39,7 @@ uint64_t AvlAllocator::_pick_block_after(uint64_t *cursor,
uint64_t search_bytes = 0;
auto rs_start = range_tree.lower_bound(range_t{*cursor, size}, compare);
for (auto rs = rs_start; rs != range_tree.end(); ++rs) {
- uint64_t offset = p2roundup(rs->start, align);
+ uint64_t offset = rs->start;
*cursor = offset + size;
if (offset + size <= rs->end) {
return offset;
@@ -59,7 +59,7 @@ uint64_t AvlAllocator::_pick_block_after(uint64_t *cursor,
}
// If we reached end, start from beginning till cursor.
for (auto rs = range_tree.begin(); rs != rs_start; ++rs) {
- uint64_t offset = p2roundup(rs->start, align);
+ uint64_t offset = rs->start;
*cursor = offset + size;
if (offset + size <= rs->end) {
return offset;
@@ -82,7 +82,7 @@ uint64_t AvlAllocator::_pick_block_fits(uint64_t size,
const auto compare = range_size_tree.key_comp();
auto rs_start = range_size_tree.lower_bound(range_t{0, size}, compare);
for (auto rs = rs_start; rs != range_size_tree.end(); ++rs) {
- uint64_t offset = p2roundup(rs->start, align);
+ uint64_t offset = rs->start;
if (offset + size <= rs->end) {
return offset;
}
diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc
index 52857a16081..54d0bbc38e5 100644
--- a/src/os/bluestore/BlueFS.cc
+++ b/src/os/bluestore/BlueFS.cc
@@ -695,16 +695,24 @@ void BlueFS::_init_alloc()
}
logger->set(l_bluefs_wal_alloc_unit, wal_alloc_size);
+
+ uint64_t shared_alloc_size = cct->_conf->bluefs_shared_alloc_size;
+ if (shared_alloc && shared_alloc->a) {
+ uint64_t unit = shared_alloc->a->get_block_size();
+ shared_alloc_size = std::max(
+ unit,
+ shared_alloc_size);
+ ceph_assert(0 == p2phase(shared_alloc_size, unit));
+ }
if (bdev[BDEV_SLOW]) {
alloc_size[BDEV_DB] = cct->_conf->bluefs_alloc_size;
- alloc_size[BDEV_SLOW] = cct->_conf->bluefs_shared_alloc_size;
- logger->set(l_bluefs_db_alloc_unit, cct->_conf->bluefs_alloc_size);
- logger->set(l_bluefs_main_alloc_unit, cct->_conf->bluefs_shared_alloc_size);
+ alloc_size[BDEV_SLOW] = shared_alloc_size;
} else {
- alloc_size[BDEV_DB] = cct->_conf->bluefs_shared_alloc_size;
- logger->set(l_bluefs_main_alloc_unit, 0);
- logger->set(l_bluefs_db_alloc_unit, cct->_conf->bluefs_shared_alloc_size);
+ alloc_size[BDEV_DB] = shared_alloc_size;
+ alloc_size[BDEV_SLOW] = 0;
}
+ logger->set(l_bluefs_db_alloc_unit, alloc_size[BDEV_DB]);
+ logger->set(l_bluefs_main_alloc_unit, alloc_size[BDEV_SLOW]);
// new wal and db devices are never shared
if (bdev[BDEV_NEWWAL]) {
alloc_size[BDEV_NEWWAL] = cct->_conf->bluefs_alloc_size;
@@ -718,13 +726,13 @@ void BlueFS::_init_alloc()
continue;
}
ceph_assert(bdev[id]->get_size());
- ceph_assert(alloc_size[id]);
if (is_shared_alloc(id)) {
dout(1) << __func__ << " shared, id " << id << std::hex
<< ", capacity 0x" << bdev[id]->get_size()
<< ", block size 0x" << alloc_size[id]
<< std::dec << dendl;
} else {
+ ceph_assert(alloc_size[id]);
std::string name = "bluefs-";
const char* devnames[] = { "wal","db","slow" };
if (id <= BDEV_SLOW)
@@ -1579,11 +1587,9 @@ int BlueFS::_replay(bool noop, bool to_stdout)
}
if (fnode.ino != 1) {
vselector->sub_usage(f->vselector_hint, f->fnode);
- }
+ vselector->add_usage(f->vselector_hint, fnode);
+ }
f->fnode = fnode;
- if (fnode.ino != 1) {
- vselector->add_usage(f->vselector_hint, f->fnode);
- }
if (fnode.ino > ino_last) {
ino_last = fnode.ino;
@@ -1825,7 +1831,7 @@ int BlueFS::device_migrate_to_existing(
// write entire file
auto l = _allocate(dev_target, bl.length(), 0,
- &file_ref->fnode, 0, false);
+ &file_ref->fnode, nullptr, 0, false);
if (l < 0) {
derr << __func__ << " unable to allocate len 0x" << std::hex
<< bl.length() << std::dec << " from " << (int)dev_target
@@ -1965,7 +1971,7 @@ int BlueFS::device_migrate_to_new(
// write entire file
auto l = _allocate(dev_target, bl.length(), 0,
- &file_ref->fnode, 0, false);
+ &file_ref->fnode, nullptr, 0, false);
if (l < 0) {
derr << __func__ << " unable to allocate len 0x" << std::hex
<< bl.length() << std::dec << " from " << (int)dev_target
@@ -2559,7 +2565,6 @@ void BlueFS::_rewrite_log_and_layout_sync_LNF_LD(bool permit_dev_fallback,
auto t0 = mono_clock::now();
File *log_file = log.writer->file.get();
- bluefs_fnode_t fnode_tail;
// log.t.seq is always set to current live seq
ceph_assert(log.t.seq == log.seq_live);
// Capturing entire state. Dump anything that has been stored there.
@@ -2614,7 +2619,8 @@ void BlueFS::_rewrite_log_and_layout_sync_LNF_LD(bool permit_dev_fallback,
dout(20) << __func__ << " compacted_meta_need " << compacted_meta_need << dendl;
- int r = _allocate(log_dev, compacted_meta_need, 0, &fnode_tail, 0,
+ bluefs_fnode_t fnode_tail;
+ int r = _allocate(log_dev, compacted_meta_need, 0, &fnode_tail, nullptr, 0,
permit_dev_fallback);
ceph_assert(r == 0);
@@ -2625,7 +2631,7 @@ void BlueFS::_rewrite_log_and_layout_sync_LNF_LD(bool permit_dev_fallback,
uint64_t starter_need = _make_initial_transaction(starter_seq, fnode_tail, 0, nullptr);
bluefs_fnode_t fnode_starter(log_file->fnode.ino, 0, mtime);
- r = _allocate(log_dev, starter_need, 0, &fnode_starter, 0,
+ r = _allocate(log_dev, starter_need, 0, &fnode_starter, nullptr, 0,
permit_dev_fallback);
ceph_assert(r == 0);
@@ -3126,16 +3132,17 @@ void BlueFS::_extend_log(uint64_t amount) {
}
ll.release();
uint64_t allocated_before_extension = log.writer->file->fnode.get_allocated();
- vselector->sub_usage(log.writer->file->vselector_hint, log.writer->file->fnode);
amount = round_up_to(amount, super.block_size);
int r = _allocate(
vselector->select_prefer_bdev(log.writer->file->vselector_hint),
amount,
0,
- &log.writer->file->fnode);
+ &log.writer->file->fnode,
+ [&](const bluefs_extent_t& e) {
+ vselector->add_usage(log.writer->file->vselector_hint, e);
+ });
ceph_assert(r == 0);
dout(10) << "extended log by 0x" << std::hex << amount << " bytes " << dendl;
- vselector->add_usage(log.writer->file->vselector_hint, log.writer->file->fnode);
bluefs_transaction_t log_extend_transaction;
log_extend_transaction.seq = log.t.seq;
@@ -3405,7 +3412,9 @@ int BlueFS::_flush_range_F(FileWriter *h, uint64_t offset, uint64_t length)
dout(10) << __func__ << " " << h << " pos 0x" << std::hex << h->pos
<< " 0x" << offset << "~" << length << std::dec
- << " to " << h->file->fnode << dendl;
+ << " to " << h->file->fnode
+ << " hint " << h->file->vselector_hint
+ << dendl;
if (h->file->deleted) {
dout(10) << __func__ << " deleted, no-op" << dendl;
return 0;
@@ -3426,7 +3435,6 @@ int BlueFS::_flush_range_F(FileWriter *h, uint64_t offset, uint64_t length)
ceph_assert(offset <= h->file->fnode.size);
uint64_t allocated = h->file->fnode.get_allocated();
- vselector->sub_usage(h->file->vselector_hint, h->file->fnode);
// do not bother to dirty the file if we are overwriting
// previously allocated extents.
if (allocated < offset + length) {
@@ -3435,25 +3443,26 @@ int BlueFS::_flush_range_F(FileWriter *h, uint64_t offset, uint64_t length)
int r = _allocate(vselector->select_prefer_bdev(h->file->vselector_hint),
offset + length - allocated,
0,
- &h->file->fnode);
+ &h->file->fnode,
+ [&](const bluefs_extent_t& e) {
+ vselector->add_usage(h->file->vselector_hint, e);
+ });
if (r < 0) {
derr << __func__ << " allocated: 0x" << std::hex << allocated
<< " offset: 0x" << offset << " length: 0x" << length << std::dec
<< dendl;
- vselector->add_usage(h->file->vselector_hint, h->file->fnode); // undo
ceph_abort_msg("bluefs enospc");
return r;
}
h->file->is_dirty = true;
}
if (h->file->fnode.size < offset + length) {
+ vselector->add_usage(h->file->vselector_hint, offset + length - h->file->fnode.size);
h->file->fnode.size = offset + length;
h->file->is_dirty = true;
}
-
dout(20) << __func__ << " file now, unflushed " << h->file->fnode << dendl;
int res = _flush_data(h, offset, length, buffered);
- vselector->add_usage(h->file->vselector_hint, h->file->fnode);
logger->tinc(l_bluefs_flush_lat, mono_clock::now() - t0);
return res;
}
@@ -3710,10 +3719,9 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/
_flush_bdev(h);
std::lock_guard ll(log.lock);
- vselector->sub_usage(h->file->vselector_hint, h->file->fnode.size);
+ vselector->sub_usage(h->file->vselector_hint, h->file->fnode.size - offset);
h->file->fnode.size = offset;
h->file->is_dirty = true;
- vselector->add_usage(h->file->vselector_hint, h->file->fnode.size);
log.t.op_file_update_inc(h->file->fnode);
logger->tinc(l_bluefs_truncate_lat, mono_clock::now() - t0);
return 0;
@@ -3809,6 +3817,7 @@ const char* BlueFS::get_device_name(unsigned id)
int BlueFS::_allocate(uint8_t id, uint64_t len,
uint64_t alloc_unit,
bluefs_fnode_t* node,
+ update_fn_t cb,
size_t alloc_attempts,
bool permit_dev_fallback)
{
@@ -3892,6 +3901,7 @@ int BlueFS::_allocate(uint8_t id, uint64_t len,
len,
alloc_unit,
node,
+ cb,
alloc_attempts,
permit_dev_fallback);
} else if (permit_dev_fallback && id != BDEV_SLOW && alloc[id + 1]) {
@@ -3905,6 +3915,7 @@ int BlueFS::_allocate(uint8_t id, uint64_t len,
len,
0, // back to default alloc unit
node,
+ cb,
alloc_attempts,
permit_dev_fallback);
} else {
@@ -3924,9 +3935,12 @@ int BlueFS::_allocate(uint8_t id, uint64_t len,
}
for (auto& p : extents) {
- node->append_extent(bluefs_extent_t(id, p.offset, p.length));
+ bluefs_extent_t e(id, p.offset, p.length);
+ node->append_extent(e);
+ if (cb) {
+ cb(e);
+ }
}
-
return 0;
}
@@ -3945,12 +3959,13 @@ int BlueFS::preallocate(FileRef f, uint64_t off, uint64_t len)/*_LF*/
if (off + len > allocated) {
uint64_t want = off + len - allocated;
- vselector->sub_usage(f->vselector_hint, f->fnode);
int r = _allocate(vselector->select_prefer_bdev(f->vselector_hint),
want,
0,
- &f->fnode);
- vselector->add_usage(f->vselector_hint, f->fnode);
+ &f->fnode,
+ [&](const bluefs_extent_t& e) {
+ vselector->add_usage(f->vselector_hint, e);
+ });
if (r < 0)
return r;
@@ -4007,7 +4022,6 @@ int BlueFS::open_for_write(
_maybe_check_vselector_LNF();
FileRef file;
bool create = false;
- bool truncate = false;
mempool::bluefs::vector<bluefs_extent_t> pending_release_extents;
{
std::lock_guard ll(log.lock);
@@ -4034,10 +4048,12 @@ int BlueFS::open_for_write(
}
file = ceph::make_ref<File>();
file->fnode.ino = ++ino_last;
+ file->vselector_hint = vselector->get_hint_by_dir(dirname);
nodes.file_map[ino_last] = file;
dir->file_map.emplace_hint(q, string{filename}, file);
++file->refs;
create = true;
+ vselector->add_usage(file->vselector_hint, file->fnode.size, true); // update file count
logger->set(l_bluefs_num_files, nodes.file_map.size());
} else {
// overwrite existing file?
@@ -4052,8 +4068,8 @@ int BlueFS::open_for_write(
<< " already exists, truncate + overwrite" << dendl;
vselector->sub_usage(file->vselector_hint, file->fnode);
file->fnode.size = 0;
+ vselector->add_usage(file->vselector_hint, file->fnode.size, true); // restore file count
pending_release_extents.swap(file->fnode.extents);
- truncate = true;
file->fnode.clear_extents();
}
@@ -4061,11 +4077,6 @@ int BlueFS::open_for_write(
ceph_assert(file->fnode.ino > 1);
file->fnode.mtime = ceph_clock_now();
- file->vselector_hint = vselector->get_hint_by_dir(dirname);
- if (create || truncate) {
- vselector->add_usage(file->vselector_hint, file->fnode); // update file count
- }
-
dout(20) << __func__ << " mapping " << dirname << "/" << filename
<< " vsel_hint " << file->vselector_hint
<< dendl;
diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h
index f0493a758da..759e71fdf05 100644
--- a/src/os/bluestore/BlueFS.h
+++ b/src/os/bluestore/BlueFS.h
@@ -88,15 +88,102 @@ public:
virtual ~BlueFSVolumeSelector() {
}
+ /**
+ * Method to learn a hint (aka logic level discriminator) specific for
+ * BlueFS log
+ *
+ */
virtual void* get_hint_for_log() const = 0;
+ /**
+ * Method to learn a hint (aka logic level discriminator) provided directory
+ * bound to.
+ *
+ */
virtual void* get_hint_by_dir(std::string_view dirname) const = 0;
- virtual void add_usage(void* file_hint, const bluefs_fnode_t& fnode) = 0;
- virtual void sub_usage(void* file_hint, const bluefs_fnode_t& fnode) = 0;
- virtual void add_usage(void* file_hint, uint64_t fsize) = 0;
- virtual void sub_usage(void* file_hint, uint64_t fsize) = 0;
+ /**
+ * Increments stats for a given logical level using provided fnode as a delta,
+ * Parameters:
+ * hint: logical level discriminator
+ * fnode: fnode metadata to be used as a complex delta value:
+ * (+1 file count, +file size, +all the extents)
+ *
+ */
+ void add_usage(void* hint, const bluefs_fnode_t& fnode) {
+ for (auto& e : fnode.extents) {
+ add_usage(hint, e);
+ }
+ add_usage(hint, fnode.size, true);
+ }
+ /**
+ * Decrements stats for a given logical level using provided fnode as a delta
+ * Parameters:
+ * hint: logical level discriminator
+ * fnode: fnode metadata to be used as a complex delta value:
+ * (-1 file count, -file size, -all the extents)
+ *
+ */
+ void sub_usage(void* hint, const bluefs_fnode_t& fnode) {
+ for (auto& e : fnode.extents) {
+ sub_usage(hint, e);
+ }
+ sub_usage(hint, fnode.size, true);
+ }
+ /**
+ * Increments stats for a given logical level using provided extent as a delta,
+ * Parameters:
+ * hint: logical level discriminator
+ * extent: bluefs extent to be used as a complex delta value:
+ * (.bdev determines physical location, +length)
+ *
+ */
+ virtual void add_usage(void* hint, const bluefs_extent_t& extent) = 0;
+ /**
+ * Decrements stats for a given logical level using provided extent as a delta,
+ * Parameters:
+ * hint: logical level discriminator
+ * extent: bluefs extent to be used as a complex delta value:
+ * (.bdev determines physical location, -length)
+ *
+ */
+ virtual void sub_usage(void* hint, const bluefs_extent_t& extent) = 0;
+ /**
+ * Increments files count and overall files size for a given logical level
+ * Parameters:
+ * hint: logical level discriminator
+ * fsize: delta value for file size
+ * upd_files: whether or not to increment file count
+ *
+ */
+ virtual void add_usage(void* hint, uint64_t fsize, bool upd_files = false) = 0;
+ /**
+ * Decrements files count and overall files size for a given logical level
+ * Parameters:
+ * hint: logical level discriminator
+ * fsize: delta value for file size
+ * upd_files: whether or not to decrement file count
+ *
+ */
+ virtual void sub_usage(void* hint, uint64_t fsize, bool upd_files = false) = 0;
+
+ /**
+ * Determines preferred physical device for the given logical level
+ * Parameters:
+ * hint: logical level discriminator
+ *
+ */
virtual uint8_t select_prefer_bdev(void* hint) = 0;
+ /**
+ * Builds path set for RocksDB to use
+ * Parameters:
+ * base: path's root
+ *
+ */
virtual void get_paths(const std::string& base, paths& res) const = 0;
+ /**
+ * Dumps VSelector's state
+ *
+ */
virtual void dump(std::ostream& sout) = 0;
/* used for sanity checking of vselector */
@@ -439,9 +526,12 @@ private:
return bdev[BDEV_SLOW] ? BDEV_SLOW : BDEV_DB;
}
const char* get_device_name(unsigned id);
+
+ typedef std::function<void(const bluefs_extent_t)> update_fn_t;
int _allocate(uint8_t bdev, uint64_t len,
uint64_t alloc_unit,
bluefs_fnode_t* node,
+ update_fn_t cb = nullptr,
size_t alloc_attempts = 0,
bool permit_dev_fallback = true);
@@ -716,19 +806,19 @@ public:
void* get_hint_for_log() const override;
void* get_hint_by_dir(std::string_view dirname) const override;
- void add_usage(void* hint, const bluefs_fnode_t& fnode) override {
+ void add_usage(void* hint, const bluefs_extent_t& extent) override {
// do nothing
return;
}
- void sub_usage(void* hint, const bluefs_fnode_t& fnode) override {
+ void sub_usage(void* hint, const bluefs_extent_t& extent) override {
// do nothing
return;
}
- void add_usage(void* hint, uint64_t fsize) override {
+ void add_usage(void*, uint64_t, bool) override {
// do nothing
return;
}
- void sub_usage(void* hint, uint64_t fsize) override {
+ void sub_usage(void*, uint64_t, bool) override {
// do nothing
return;
}
diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc
index b8dca31c057..0f79fc4f095 100644
--- a/src/os/bluestore/BlueStore.cc
+++ b/src/os/bluestore/BlueStore.cc
@@ -7396,7 +7396,7 @@ int BlueStore::_open_bluefs(bool create, bool read_only)
bluefs->get_block_device_size(BlueFS::BDEV_WAL) * 95 / 100,
bluefs->get_block_device_size(BlueFS::BDEV_DB) * 95 / 100,
bluefs->get_block_device_size(BlueFS::BDEV_SLOW) * 95 / 100,
- 1024 * 1024 * 1024, //FIXME: set expected l0 size here
+ rocks_opts.write_buffer_size * rocks_opts.max_write_buffer_number,
rocks_opts.max_bytes_for_level_base,
rocks_opts.max_bytes_for_level_multiplier,
reserved_factor,
@@ -19266,7 +19266,13 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) {
auto max_x = per_level_per_dev_usage.get_max_x();
auto max_y = per_level_per_dev_usage.get_max_y();
- sout << "RocksDBBlueFSVolumeSelector Usage Matrix:" << std::endl;
+ sout << "RocksDBBlueFSVolumeSelector " << std::endl;
+ sout << ">>Settings<<"
+ << " extra=" << byte_u_t(db_avail4slow)
+ << ", l0_size=" << byte_u_t(level0_size)
+ << ", l_base=" << byte_u_t(level_base)
+ << ", l_multi=" << byte_u_t(level_multiplier)
+ << std::endl;
constexpr std::array<const char*, 8> names{ {
"DEV/LEV",
"WAL",
diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h
index 095c838608e..beac82f163a 100644
--- a/src/os/bluestore/BlueStore.h
+++ b/src/os/bluestore/BlueStore.h
@@ -4240,6 +4240,9 @@ class RocksDBBlueFSVolumeSelector : public BlueFSVolumeSelector
uint64_t l_totals[LEVEL_MAX - LEVEL_FIRST];
uint64_t db_avail4slow = 0;
+ uint64_t level0_size = 0;
+ uint64_t level_base = 0;
+ uint64_t level_multiplier = 0;
enum {
OLD_POLICY,
USE_SOME_EXTRA
@@ -4265,21 +4268,24 @@ public:
if (!new_pol) {
return;
}
-
// Calculating how much extra space is available at DB volume.
// Depending on the presence of explicit reserved size specification it might be either
// * DB volume size - reserved
// or
// * DB volume size - sum_max_level_size(0, L-1) - max_level_size(L) * reserved_factor
if (!reserved) {
+ level0_size = _level0_size;
+ level_base = _level_base;
+ level_multiplier = _level_multiplier;
uint64_t prev_levels = _level0_size;
uint64_t cur_level = _level_base;
- uint64_t cur_threshold = 0;
+ uint64_t cur_threshold = prev_levels + cur_level;
do {
- uint64_t next_level = cur_level * _level_multiplier;
- uint64_t next_threshold = prev_levels + cur_level + next_level * reserved_factor;
+ uint64_t next_level = cur_level * _level_multiplier;
+ uint64_t next_threshold = prev_levels + cur_level + next_level;
if (_db_total <= next_threshold) {
- db_avail4slow = cur_threshold ? _db_total - cur_threshold : 0;
+ cur_threshold *= reserved_factor;
+ db_avail4slow = cur_threshold < _db_total ? _db_total - cur_threshold : 0;
break;
} else {
prev_levels += cur_level;
@@ -4288,7 +4294,7 @@ public:
}
} while (true);
} else {
- db_avail4slow = _db_total - reserved;
+ db_avail4slow = reserved < _db_total ? _db_total - reserved : 0;
}
}
@@ -4297,63 +4303,40 @@ public:
}
void* get_hint_by_dir(std::string_view dirname) const override;
- void add_usage(void* hint, const bluefs_fnode_t& fnode) override {
+ void add_usage(void* hint, const bluefs_extent_t& extent) override {
if (hint == nullptr)
return;
size_t pos = (size_t)hint - LEVEL_FIRST;
- for (auto& p : fnode.extents) {
- auto& cur = per_level_per_dev_usage.at(p.bdev, pos);
- auto& max = per_level_per_dev_max.at(p.bdev, pos);
- uint64_t v = cur.fetch_add(p.length) + p.length;
- while (v > max) {
- max.exchange(v);
- }
- {
- //update per-device totals
- auto& cur = per_level_per_dev_usage.at(p.bdev, LEVEL_MAX - LEVEL_FIRST);
- auto& max = per_level_per_dev_max.at(p.bdev, LEVEL_MAX - LEVEL_FIRST);
- uint64_t v = cur.fetch_add(p.length) + p.length;
- while (v > max) {
- max.exchange(v);
- }
- }
+ auto& cur = per_level_per_dev_usage.at(extent.bdev, pos);
+ auto& max = per_level_per_dev_max.at(extent.bdev, pos);
+ uint64_t v = cur.fetch_add(extent.length) + extent.length;
+ while (v > max) {
+ max.exchange(v);
}
{
- //update per-level actual totals
- auto& cur = per_level_per_dev_usage.at(BlueFS::MAX_BDEV, pos);
- auto& max = per_level_per_dev_max.at(BlueFS::MAX_BDEV, pos);
- uint64_t v = cur.fetch_add(fnode.size) + fnode.size;
+ //update per-device totals
+ auto& cur = per_level_per_dev_usage.at(extent.bdev, LEVEL_MAX - LEVEL_FIRST);
+ auto& max = per_level_per_dev_max.at(extent.bdev, LEVEL_MAX - LEVEL_FIRST);
+ uint64_t v = cur.fetch_add(extent.length) + extent.length;
while (v > max) {
max.exchange(v);
}
}
- ++per_level_files[pos];
- ++per_level_files[LEVEL_MAX - LEVEL_FIRST];
}
- void sub_usage(void* hint, const bluefs_fnode_t& fnode) override {
+ void sub_usage(void* hint, const bluefs_extent_t& extent) override {
if (hint == nullptr)
return;
size_t pos = (size_t)hint - LEVEL_FIRST;
- for (auto& p : fnode.extents) {
- auto& cur = per_level_per_dev_usage.at(p.bdev, pos);
- ceph_assert(cur >= p.length);
- cur -= p.length;
-
- //update per-device totals
- auto& cur2 = per_level_per_dev_usage.at(p.bdev, LEVEL_MAX - LEVEL_FIRST);
- ceph_assert(cur2 >= p.length);
- cur2 -= p.length;
- }
- //update per-level actual totals
- auto& cur = per_level_per_dev_usage.at(BlueFS::MAX_BDEV, pos);
- ceph_assert(cur >= fnode.size);
- cur -= fnode.size;
- ceph_assert(per_level_files[pos] > 0);
- --per_level_files[pos];
- ceph_assert(per_level_files[LEVEL_MAX - LEVEL_FIRST] > 0);
- --per_level_files[LEVEL_MAX - LEVEL_FIRST];
+ auto& cur = per_level_per_dev_usage.at(extent.bdev, pos);
+ ceph_assert(cur >= extent.length);
+ cur -= extent.length;
+
+ //update per-device totals
+ auto& cur2 = per_level_per_dev_usage.at(extent.bdev, LEVEL_MAX - LEVEL_FIRST);
+ ceph_assert(cur2 >= extent.length);
+ cur2 -= extent.length;
}
- void add_usage(void* hint, uint64_t size_more) override {
+ void add_usage(void* hint, uint64_t size_more, bool upd_files) override {
if (hint == nullptr)
return;
size_t pos = (size_t)hint - LEVEL_FIRST;
@@ -4364,8 +4347,12 @@ public:
while (v > max) {
max.exchange(v);
}
+ if (upd_files) {
+ ++per_level_files[pos];
+ ++per_level_files[LEVEL_MAX - LEVEL_FIRST];
+ }
}
- void sub_usage(void* hint, uint64_t size_less) override {
+ void sub_usage(void* hint, uint64_t size_less, bool upd_files) override {
if (hint == nullptr)
return;
size_t pos = (size_t)hint - LEVEL_FIRST;
@@ -4373,6 +4360,12 @@ public:
auto& cur = per_level_per_dev_usage.at(BlueFS::MAX_BDEV, pos);
ceph_assert(cur >= size_less);
cur -= size_less;
+ if (upd_files) {
+ ceph_assert(per_level_files[pos] > 0);
+ --per_level_files[pos];
+ ceph_assert(per_level_files[LEVEL_MAX - LEVEL_FIRST] > 0);
+ --per_level_files[LEVEL_MAX - LEVEL_FIRST];
+ }
}
uint8_t select_prefer_bdev(void* h) override;
diff --git a/src/os/bluestore/BtreeAllocator.cc b/src/os/bluestore/BtreeAllocator.cc
index 2071603a219..2647b875992 100644
--- a/src/os/bluestore/BtreeAllocator.cc
+++ b/src/os/bluestore/BtreeAllocator.cc
@@ -25,7 +25,7 @@ uint64_t BtreeAllocator::_pick_block_after(uint64_t *cursor,
{
auto rs_start = range_tree.lower_bound(*cursor);
for (auto rs = rs_start; rs != range_tree.end(); ++rs) {
- uint64_t offset = p2roundup(rs->first, align);
+ uint64_t offset = rs->first;
if (offset + size <= rs->second) {
*cursor = offset + size;
return offset;
@@ -37,7 +37,7 @@ uint64_t BtreeAllocator::_pick_block_after(uint64_t *cursor,
}
// If we reached end, start from beginning till cursor.
for (auto rs = range_tree.begin(); rs != rs_start; ++rs) {
- uint64_t offset = p2roundup(rs->first, align);
+ uint64_t offset = rs->first;
if (offset + size <= rs->second) {
*cursor = offset + size;
return offset;
@@ -53,7 +53,7 @@ uint64_t BtreeAllocator::_pick_block_fits(uint64_t size,
// the needs
auto rs_start = range_size_tree.lower_bound(range_value_t{0,size});
for (auto rs = rs_start; rs != range_size_tree.end(); ++rs) {
- uint64_t offset = p2roundup(rs->start, align);
+ uint64_t offset = rs->start;
if (offset + size <= rs->start + rs->size) {
return offset;
}
diff --git a/src/os/bluestore/StupidAllocator.cc b/src/os/bluestore/StupidAllocator.cc
index 550024e67e7..8f74a499ed3 100644
--- a/src/os/bluestore/StupidAllocator.cc
+++ b/src/os/bluestore/StupidAllocator.cc
@@ -52,20 +52,6 @@ void StupidAllocator::_insert_free(uint64_t off, uint64_t len)
}
}
-/// return the effective length of the extent if we align to alloc_unit
-uint64_t StupidAllocator::_aligned_len(
- StupidAllocator::interval_set_t::iterator p,
- uint64_t alloc_unit)
-{
- uint64_t skew = p.get_start() % alloc_unit;
- if (skew)
- skew = alloc_unit - skew;
- if (skew > p.get_len())
- return 0;
- else
- return p.get_len() - skew;
-}
-
int64_t StupidAllocator::allocate_int(
uint64_t want_size, uint64_t alloc_unit, int64_t hint,
uint64_t *offset, uint32_t *length)
@@ -89,7 +75,7 @@ int64_t StupidAllocator::allocate_int(
for (bin = orig_bin; bin < (int)free.size(); ++bin) {
p = free[bin].lower_bound(hint);
while (p != free[bin].end()) {
- if (_aligned_len(p, alloc_unit) >= want_size) {
+ if (p.get_len() >= want_size) {
goto found;
}
++p;
@@ -102,7 +88,7 @@ int64_t StupidAllocator::allocate_int(
p = free[bin].begin();
auto end = hint ? free[bin].lower_bound(hint) : free[bin].end();
while (p != end) {
- if (_aligned_len(p, alloc_unit) >= want_size) {
+ if (p.get_len() >= want_size) {
goto found;
}
++p;
@@ -114,7 +100,7 @@ int64_t StupidAllocator::allocate_int(
for (bin = orig_bin; bin >= 0; --bin) {
p = free[bin].lower_bound(hint);
while (p != free[bin].end()) {
- if (_aligned_len(p, alloc_unit) >= alloc_unit) {
+ if (p.get_len() >= alloc_unit) {
goto found;
}
++p;
@@ -127,7 +113,7 @@ int64_t StupidAllocator::allocate_int(
p = free[bin].begin();
auto end = hint ? free[bin].lower_bound(hint) : free[bin].end();
while (p != end) {
- if (_aligned_len(p, alloc_unit) >= alloc_unit) {
+ if (p.get_len() >= alloc_unit) {
goto found;
}
++p;
@@ -137,11 +123,9 @@ int64_t StupidAllocator::allocate_int(
return -ENOSPC;
found:
- uint64_t skew = p.get_start() % alloc_unit;
- if (skew)
- skew = alloc_unit - skew;
- *offset = p.get_start() + skew;
- *length = std::min(std::max(alloc_unit, want_size), p2align((p.get_len() - skew), alloc_unit));
+ *offset = p.get_start();
+ *length = std::min(std::max(alloc_unit, want_size), p2align(p.get_len(), alloc_unit));
+
if (cct->_conf->bluestore_debug_small_allocations) {
uint64_t max =
alloc_unit * (rand() % cct->_conf->bluestore_debug_small_allocations);
@@ -158,7 +142,7 @@ int64_t StupidAllocator::allocate_int(
free[bin].erase(*offset, *length);
uint64_t off, len;
- if (*offset && free[bin].contains(*offset - skew - 1, &off, &len)) {
+ if (*offset && free[bin].contains(*offset - 1, &off, &len)) {
int newbin = _choose_bin(len);
if (newbin != bin) {
ldout(cct, 30) << __func__ << " demoting 0x" << std::hex << off << "~" << len
diff --git a/src/os/bluestore/StupidAllocator.h b/src/os/bluestore/StupidAllocator.h
index 0d50d73f42a..443b091350e 100644
--- a/src/os/bluestore/StupidAllocator.h
+++ b/src/os/bluestore/StupidAllocator.h
@@ -31,10 +31,6 @@ class StupidAllocator : public Allocator {
unsigned _choose_bin(uint64_t len);
void _insert_free(uint64_t offset, uint64_t len);
- uint64_t _aligned_len(
- interval_set_t::iterator p,
- uint64_t alloc_unit);
-
public:
StupidAllocator(CephContext* cct,
int64_t size,
diff --git a/src/os/bluestore/bluestore_types.cc b/src/os/bluestore/bluestore_types.cc
index a3d0d41acb5..3c8dc84810c 100644
--- a/src/os/bluestore/bluestore_types.cc
+++ b/src/os/bluestore/bluestore_types.cc
@@ -1181,6 +1181,15 @@ void bluestore_onode_t::shard_info::dump(Formatter *f) const
f->dump_unsigned("bytes", bytes);
}
+void bluestore_onode_t::shard_info::generate_test_instances(
+ list<shard_info*>& o)
+{
+ o.push_back(new shard_info);
+ o.push_back(new shard_info);
+ o.back()->offset = 123;
+ o.back()->bytes = 456;
+}
+
ostream& operator<<(ostream& out, const bluestore_onode_t::shard_info& si)
{
return out << std::hex << "0x" << si.offset << "(0x" << si.bytes << " bytes"
diff --git a/src/os/bluestore/bluestore_types.h b/src/os/bluestore/bluestore_types.h
index 8a0e0b7c9f7..66d4189ab6d 100644
--- a/src/os/bluestore/bluestore_types.h
+++ b/src/os/bluestore/bluestore_types.h
@@ -1011,6 +1011,7 @@ struct bluestore_onode_t {
denc_varint(v.bytes, p);
}
void dump(ceph::Formatter *f) const;
+ static void generate_test_instances(std::list<shard_info*>& ls);
};
std::vector<shard_info> extent_map_shards; ///< extent std::map shards (if any)
diff --git a/src/os/bluestore/fastbmap_allocator_impl.cc b/src/os/bluestore/fastbmap_allocator_impl.cc
index cea046c3ff2..4833b9d1a7b 100644
--- a/src/os/bluestore/fastbmap_allocator_impl.cc
+++ b/src/os/bluestore/fastbmap_allocator_impl.cc
@@ -17,19 +17,9 @@ uint64_t AllocatorLevel::l2_allocs = 0;
inline interval_t _align2units(uint64_t offset, uint64_t len, uint64_t min_length)
{
- interval_t res;
- if (len >= min_length) {
- res.offset = p2roundup(offset, min_length);
- auto delta_off = res.offset - offset;
- if (len > delta_off) {
- res.length = len - delta_off;
- res.length = p2align<uint64_t>(res.length, min_length);
- if (res.length) {
- return res;
- }
- }
- }
- return interval_t();
+ return len >= min_length ?
+ interval_t(offset, p2align<uint64_t>(len, min_length)) :
+ interval_t();
}
interval_t AllocatorLevel01Loose::_get_longest_from_l0(uint64_t pos0,
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index d2f97a129a2..490f8fa68d5 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -1490,7 +1490,7 @@ std::optional<requested_scrub_t> PG::validate_initiated_scrub(
upd_flags.time_for_deep = time_for_deep;
upd_flags.deep_scrub_on_error = false;
- upd_flags.auto_repair = false; // will only be considered for periodic scrubs
+ upd_flags.auto_repair = false;
if (upd_flags.must_deep_scrub) {
upd_flags.calculated_to_deep = true;
@@ -1506,6 +1506,25 @@ std::optional<requested_scrub_t> PG::validate_initiated_scrub(
}
}
+ if (try_to_auto_repair) {
+ // for shallow scrubs: rescrub if errors found
+ // for deep: turn 'auto-repair' on
+ if (upd_flags.calculated_to_deep) {
+ dout(10) << fmt::format(
+ "{}: performing an auto-repair deep scrub",
+ __func__)
+ << dendl;
+ upd_flags.auto_repair = true;
+ } else {
+ dout(10) << fmt::format(
+ "{}: will perform an auto-repair deep scrub if errors "
+ "are found",
+ __func__)
+ << dendl;
+ upd_flags.deep_scrub_on_error = true;
+ }
+ }
+
return upd_flags;
}
@@ -1823,6 +1842,11 @@ void PG::on_activate(interval_set<snapid_t> snaps)
m_scrubber->on_pg_activate(m_planned_scrub);
}
+void PG::on_replica_activate()
+{
+ m_scrubber->on_replica_activate();
+}
+
void PG::on_active_exit()
{
backfill_reserving = false;
diff --git a/src/osd/PG.h b/src/osd/PG.h
index 2e82e74ab01..8713b1c8ae8 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -624,6 +624,8 @@ public:
void on_activate(interval_set<snapid_t> snaps) override;
+ void on_replica_activate() override;
+
void on_activate_committed() override;
void on_active_actmap() override;
@@ -1446,10 +1448,13 @@ public:
*/
class PGLockWrapper {
public:
- explicit PGLockWrapper(PGRef locked_pg) : m_pg{locked_pg} {}
+ template <typename A_PG_REF>
+ explicit PGLockWrapper(A_PG_REF&& locked_pg)
+ : m_pg{std::forward<A_PG_REF>(locked_pg)}
+ {}
PGRef pg() { return m_pg; }
~PGLockWrapper();
- PGLockWrapper(PGLockWrapper&& rhs) : m_pg(std::move(rhs.m_pg)) {
+ PGLockWrapper(PGLockWrapper&& rhs) noexcept : m_pg(std::move(rhs.m_pg)) {
rhs.m_pg = nullptr;
}
PGLockWrapper(const PGLockWrapper& rhs) = delete;
diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc
index 2c41b7b7118..c81cb958a7a 100644
--- a/src/osd/PeeringState.cc
+++ b/src/osd/PeeringState.cc
@@ -2967,6 +2967,8 @@ void PeeringState::activate(
state_set(PG_STATE_ACTIVATING);
pl->on_activate(std::move(to_trim));
+ } else {
+ pl->on_replica_activate();
}
if (acting_set_writeable()) {
PGLog::LogEntryHandlerRef rollbacker{pl->get_log_handler(t)};
diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h
index b9f8401b05b..89ef702e0fe 100644
--- a/src/osd/PeeringState.h
+++ b/src/osd/PeeringState.h
@@ -389,6 +389,7 @@ public:
virtual void on_role_change() = 0;
virtual void on_change(ObjectStore::Transaction &t) = 0;
virtual void on_activate(interval_set<snapid_t> to_trim) = 0;
+ virtual void on_replica_activate() {}
virtual void on_activate_complete() = 0;
virtual void on_new_interval() = 0;
virtual Context *on_clean() = 0;
diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc
index e3a71e26234..99367170dba 100644
--- a/src/osd/scrubber/osd_scrub.cc
+++ b/src/osd/scrubber/osd_scrub.cc
@@ -441,14 +441,14 @@ void OsdScrub::dec_scrubs_local()
m_resource_bookkeeper.dec_scrubs_local();
}
-bool OsdScrub::inc_scrubs_remote()
+bool OsdScrub::inc_scrubs_remote(pg_t pgid)
{
- return m_resource_bookkeeper.inc_scrubs_remote();
+ return m_resource_bookkeeper.inc_scrubs_remote(pgid);
}
-void OsdScrub::dec_scrubs_remote()
+void OsdScrub::dec_scrubs_remote(pg_t pgid)
{
- m_resource_bookkeeper.dec_scrubs_remote();
+ m_resource_bookkeeper.dec_scrubs_remote(pgid);
}
void OsdScrub::mark_pg_scrub_blocked(spg_t blocked_pg)
diff --git a/src/osd/scrubber/osd_scrub.h b/src/osd/scrubber/osd_scrub.h
index 570430660ed..56167df2ee6 100644
--- a/src/osd/scrubber/osd_scrub.h
+++ b/src/osd/scrubber/osd_scrub.h
@@ -67,8 +67,8 @@ class OsdScrub {
// updating the resource counters
bool inc_scrubs_local();
void dec_scrubs_local();
- bool inc_scrubs_remote();
- void dec_scrubs_remote();
+ bool inc_scrubs_remote(pg_t pgid);
+ void dec_scrubs_remote(pg_t pgid);
// counting the number of PGs stuck while scrubbing, waiting for objects
void mark_pg_scrub_blocked(spg_t blocked_pg);
diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc
index 4cd861b89c8..edc6c2a2dcd 100644
--- a/src/osd/scrubber/pg_scrubber.cc
+++ b/src/osd/scrubber/pg_scrubber.cc
@@ -85,6 +85,13 @@ ostream& operator<<(ostream& out, const requested_scrub_t& sf)
return out;
}
+void PgScrubber::on_replica_activate()
+{
+ dout(10) << __func__ << dendl;
+ m_fsm->process_event(ReplicaActivate{});
+}
+
+
/*
* if the incoming message is from a previous interval, it must mean
* PrimaryLogPG::on_change() was called when that interval ended. We can safely
@@ -197,7 +204,6 @@ bool PgScrubber::should_abort() const
*
* Some of the considerations above are also relevant to the replica-side
* initiation
- * ('StartReplica' & 'StartReplicaNoWait').
*/
void PgScrubber::initiate_regular_scrub(epoch_t epoch_queued)
@@ -216,11 +222,6 @@ void PgScrubber::initiate_regular_scrub(epoch_t epoch_queued)
}
}
-void PgScrubber::dec_scrubs_remote()
-{
- m_osds->get_scrub_services().dec_scrubs_remote();
-}
-
void PgScrubber::advance_token()
{
m_current_token++;
@@ -274,13 +275,7 @@ void PgScrubber::send_start_replica(epoch_t epoch_queued,
}
if (check_interval(epoch_queued) && is_token_current(token)) {
- // save us some time by not waiting for updates if there are none
- // to wait for. Affects the transition from NotActive into either
- // ReplicaWaitUpdates or ActiveReplica.
- if (pending_active_pushes())
- m_fsm->process_event(StartReplica{});
- else
- m_fsm->process_event(StartReplicaNoWait{});
+ m_fsm->process_event(StartReplica{});
}
dout(10) << "scrubber event --<< " << __func__ << dendl;
}
@@ -452,6 +447,11 @@ unsigned int PgScrubber::scrub_requeue_priority(
* Responsible for resetting any scrub state and releasing any resources.
* Any inflight events will be ignored via check_interval/should_drop_message
* or canceled.
+ * Specifically:
+ * - if Primary and in an active session - the IntervalChanged handler takes
+ * care of discarding the remote reservations, and transitioning out of
+ * Session. That resets both the scrubber and the FSM.
+ * - if we are a reserved replica - we need to free ourselves;
*/
void PgScrubber::on_new_interval()
{
@@ -461,13 +461,7 @@ void PgScrubber::on_new_interval()
is_scrub_active(), is_queued_or_active())
<< dendl;
- // If in active session - the IntervalChanged handler takes care of
- // discarding the remote reservations, and transitioning out of Session.
- // That resets both the scrubber and the FSM.
m_fsm->process_event(IntervalChanged{});
-
- // The 'FullReset' is only relevant if we are not an active Primary
- m_fsm->process_event(FullReset{});
rm_from_osd_scrubbing();
}
@@ -806,7 +800,7 @@ void PgScrubber::cancel_callback(scrubber_callback_cancel_token_t token)
m_osds->sleep_timer.cancel_event(token);
}
-LogChannelRef &PgScrubber::get_clog() const
+LogChannelRef& PgScrubber::get_clog() const
{
return m_osds->clog;
}
@@ -816,6 +810,11 @@ int PgScrubber::get_whoami() const
return m_osds->whoami;
}
+[[nodiscard]] bool PgScrubber::is_high_priority() const
+{
+ return m_flags.required;
+}
+
/*
* The selected range is set directly into 'm_start' and 'm_end'
* setting:
@@ -1139,13 +1138,7 @@ void PgScrubber::on_init()
m_pg->publish_stats_to_osd();
}
-/*
- * Note: as on_replica_init() is likely to be called twice (entering
- * both ReplicaWaitUpdates & ActiveReplica), its operations should be
- * idempotent.
- * Now that it includes some state-changing operations, we need to check
- * m_active against double-activation.
- */
+
void PgScrubber::on_replica_init()
{
dout(10) << __func__ << " called with 'active' "
@@ -1159,6 +1152,7 @@ void PgScrubber::on_replica_init()
}
}
+
int PgScrubber::build_primary_map_chunk()
{
epoch_t map_building_since = m_pg->get_osdmap_epoch();
@@ -1217,23 +1211,21 @@ int PgScrubber::build_replica_map_chunk()
// the local map has been created. Send it to the primary.
// Note: once the message reaches the Primary, it may ask us for another
- // chunk - and we better be done with the current scrub. Thus - the
- // preparation of the reply message is separate, and we clear the scrub
- // state before actually sending it.
+ // chunk - and we better be done with the current scrub. The clearing of
+ // state must be complete before we relinquish the PG lock.
- auto reply = prep_replica_map_msg(PreemptionNoted::no_preemption);
- replica_handling_done();
- dout(15) << __func__ << " chunk map sent " << dendl;
- send_replica_map(reply);
- } break;
+ send_replica_map(prep_replica_map_msg(PreemptionNoted::no_preemption));
+ dout(15) << fmt::format("{}: chunk map sent", __func__) << dendl;
+ }
+ break;
default:
// negative retval: build_scrub_map_chunk() signalled an error
// Pre-Pacific code ignored this option, treating it as a success.
// \todo Add an error flag in the returning message.
+ // \todo: must either abort, send a reply, or return some error message
dout(1) << "Error! Aborting. ActiveReplica::react(SchedReplica) Ret: "
<< ret << dendl;
- replica_handling_done();
// only in debug mode for now:
assert(false && "backend error");
break;
@@ -1520,6 +1512,7 @@ void PgScrubber::replica_scrub_op(OpRequestRef op)
replica_scrubmap_pos.reset(); // needed? RRR
set_queued_or_active();
+ advance_token();
m_osds->queue_for_rep_scrub(m_pg,
m_replica_request_priority,
m_flags.priority,
@@ -1675,7 +1668,7 @@ void PgScrubber::handle_scrub_reserve_msgs(OpRequestRef op)
auto m = op->get_req<MOSDScrubReserve>();
switch (m->type) {
case MOSDScrubReserve::REQUEST:
- handle_scrub_reserve_request(op);
+ m_fsm->process_event(ReplicaReserveReq{op, m->from});
break;
case MOSDScrubReserve::GRANT:
m_fsm->process_event(ReplicaGrant{op, m->from});
@@ -1684,65 +1677,12 @@ void PgScrubber::handle_scrub_reserve_msgs(OpRequestRef op)
m_fsm->process_event(ReplicaReject{op, m->from});
break;
case MOSDScrubReserve::RELEASE:
- handle_scrub_reserve_release(op);
+ m_fsm->process_event(ReplicaRelease{op, m->from});
break;
}
}
-void PgScrubber::handle_scrub_reserve_request(OpRequestRef op)
-{
- auto request_ep = op->sent_epoch;
- dout(20) << fmt::format("{}: request_ep:{} recovery:{}",
- __func__,
- request_ep,
- m_osds->is_recovery_active())
- << dendl;
-
- // The primary may unilaterally restart the scrub process without notifying
- // replicas. Unconditionally clear any existing state prior to handling
- // the new reservation.
- m_fsm->process_event(FullReset{});
-
- bool granted{false};
- if (m_pg->cct->_conf->osd_scrub_during_recovery ||
- !m_osds->is_recovery_active()) {
-
- granted = m_osds->get_scrub_services().inc_scrubs_remote();
- if (granted) {
- m_fsm->process_event(ReplicaGrantReservation{});
- } else {
- dout(20) << __func__ << ": failed to reserve remotely" << dendl;
- }
- } else {
- dout(10) << __func__ << ": recovery is active; not granting" << dendl;
- }
-
- dout(10) << __func__ << " reserved? " << (granted ? "yes" : "no") << dendl;
-
- Message* reply = new MOSDScrubReserve(
- spg_t(m_pg->info.pgid.pgid, m_pg->get_primary().shard),
- request_ep,
- granted ? MOSDScrubReserve::GRANT : MOSDScrubReserve::REJECT,
- m_pg_whoami);
-
- m_osds->send_message_osd_cluster(reply, op->get_req()->get_connection());
-}
-
-void PgScrubber::handle_scrub_reserve_release(OpRequestRef op)
-{
- dout(10) << __func__ << " " << *op->get_req() << dendl;
- if (should_drop_message(op)) {
- // we might have turned into a Primary in the meantime. The interval
- // change should have been noticed already, and caused us to reset.
- return;
- }
-
- // this specific scrub session has terminated. All incoming events carrying
- // the old tag will be discarded.
- m_fsm->process_event(FullReset{});
-}
-
bool PgScrubber::set_reserving_now() {
return m_osds->get_scrub_services().set_reserving_now(m_pg_id,
ceph_clock_now());
@@ -2211,6 +2151,7 @@ void PgScrubber::handle_query_state(ceph::Formatter* f)
PgScrubber::~PgScrubber()
{
+ m_fsm->process_event(IntervalChanged{});
if (m_scrub_job) {
// make sure the OSD won't try to scrub this one just now
rm_from_osd_scrubbing();
diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h
index 9946f9ce5ee..3ac4d6dfdb7 100644
--- a/src/osd/scrubber/pg_scrubber.h
+++ b/src/osd/scrubber/pg_scrubber.h
@@ -258,14 +258,6 @@ class PgScrubber : public ScrubPgIF,
*/
void handle_scrub_reserve_msgs(OpRequestRef op) final;
- /**
- * we are a replica being asked by the Primary to reserve OSD resources for
- * scrubbing
- */
- void handle_scrub_reserve_request(OpRequestRef op);
-
- void handle_scrub_reserve_release(OpRequestRef op);
-
// managing scrub op registration
void update_scrub_job(const requested_scrub_t& request_flags) final;
@@ -334,6 +326,8 @@ class PgScrubber : public ScrubPgIF,
void on_new_interval() final;
+ void on_replica_activate() final;
+
void scrub_clear_state() final;
bool is_queued_or_active() const final;
@@ -408,6 +402,9 @@ class PgScrubber : public ScrubPgIF,
return m_pg->recovery_state.is_primary();
}
+ /// is this scrub more than just regular periodic scrub?
+ [[nodiscard]] bool is_high_priority() const final;
+
void set_state_name(const char* name) final
{
m_fsm_state_name = name;
@@ -476,13 +473,9 @@ class PgScrubber : public ScrubPgIF,
[[nodiscard]] bool was_epoch_changed() const final;
void set_queued_or_active() final;
- /// Clears `m_queued_or_active` and restarts snaptrimming
+ /// Clears `m_queued_or_active` and restarts snap-trimming
void clear_queued_or_active() final;
- void dec_scrubs_remote() final;
-
- void advance_token() final;
-
void mark_local_map_ready() final;
[[nodiscard]] bool are_all_maps_available() const final;
@@ -567,6 +560,9 @@ class PgScrubber : public ScrubPgIF,
void requeue_waiting() const { m_pg->requeue_ops(m_pg->waiting_for_scrub); }
+ /// Modify the token identifying the current replica scrub operation
+ void advance_token();
+
/**
* mark down some parameters of the initiated scrub:
* - the epoch when started;
@@ -675,11 +671,20 @@ class PgScrubber : public ScrubPgIF,
epoch_t m_epoch_start{0}; ///< the actual epoch when scrubbing started
/**
- * (replica) a tag identifying a specific scrub "session". Incremented
- * whenever the Primary releases the replica scrub resources. When the scrub
- * session is terminated (even if the interval remains unchanged, as might
- * happen following an asok no-scrub command), stale scrub-resched messages
- * triggered by the backend will be discarded.
+ * (replica) a tag identifying a specific replica operation, i.e. the
+ * creation of the replica scrub map for a single chunk.
+ *
+ * Background: the backend is asynchronous, and the specific
+ * operations are size-limited. While the scrubber handles a specific
+ * request, it is continuously triggered to poll the backend for the
+ * full results for the chunk handled.
+ * Once the chunk request becomes obsolete, either following an interval
+ * change or if a new request was received, we must not send the stale
+ * data to the primary. The polling of the obsolete chunk request must
+ * stop, and the stale backend response should be discarded.
+ * In other words - the token should be read as saying "the primary has
+ * lost interest in the results of all operations identified by mismatched
+ * token values".
*/
Scrub::act_token_t m_current_token{1};
diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc
index 40b43b6e077..2e18ed5cf4e 100644
--- a/src/osd/scrubber/scrub_machine.cc
+++ b/src/osd/scrubber/scrub_machine.cc
@@ -654,62 +654,179 @@ ScrubMachine::~ScrubMachine() = default;
// -------- for replicas -----------------------------------------------------
-// ----------------------- ReservedReplica --------------------------------
+// ----------------------- ReplicaActive --------------------------------
-ReservedReplica::ReservedReplica(my_context ctx)
+ReplicaActive::ReplicaActive(my_context ctx)
: my_base(ctx)
- , NamedSimply(context<ScrubMachine>().m_scrbr, "ReservedReplica")
+ , NamedSimply(context<ScrubMachine>().m_scrbr, "ReplicaActive")
{
- dout(10) << "-- state -->> ReservedReplica" << dendl;
+ DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
+ dout(10) << "-- state -->> ReplicaActive" << dendl;
+ m_pg = scrbr->get_pg();
+ m_osds = m_pg->get_pg_osd(ScrubberPasskey());
}
-ReservedReplica::~ReservedReplica()
+ReplicaActive::~ReplicaActive()
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
- scrbr->dec_scrubs_remote();
- scrbr->advance_token();
+ if (reserved_by_my_primary) {
+ dout(10) << "ReplicaActive::~ReplicaActive(): clearing reservation"
+ << dendl;
+ clear_reservation_by_remote_primary();
+ }
}
-// ----------------------- ReplicaIdle --------------------------------
-ReplicaIdle::ReplicaIdle(my_context ctx)
- : my_base(ctx)
- , NamedSimply(context<ScrubMachine>().m_scrbr, "ReplicaIdle")
+/*
+ * Note: we are expected to be in the initial internal state (Idle) when
+ * receiving any registration request. Our other internal states, the
+ * active ones, have their own handler for this event, and will treat it
+ * as an abort request.
+ *
+ * Process:
+ * - if already reserved: clear existing reservation, then continue
+ * - ask the OSD for the "reservation resource"
+ * - if granted: mark it internally and notify the Primary.
+ * - otherwise: just notify the requesting primary.
+ */
+void ReplicaActive::on_reserve_req(const ReplicaReserveReq& ev)
+{
+ DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
+ dout(10) << "ReplicaActive::on_reserve_req()" << dendl;
+
+ if (reserved_by_my_primary) {
+ dout(10) << "ReplicaActive::on_reserve_req(): already reserved" << dendl;
+ // clear the existing reservation
+ clear_reservation_by_remote_primary(); // clears the flag, too
+ }
+
+ // ask the OSD for the reservation
+ const auto ret = get_remote_reservation();
+ if (ret.granted) {
+ reserved_by_my_primary = true;
+ dout(10) << fmt::format("{}: reserved? yes", __func__) << dendl;
+ } else {
+ dout(10) << fmt::format("{}: reserved? no ({})", __func__, ret.error_msg)
+ << dendl;
+ }
+
+ Message* reply = new MOSDScrubReserve(
+ spg_t(pg_id.pgid, m_pg->get_primary().shard), ev.m_op->sent_epoch, ret.op,
+ m_pg->pg_whoami);
+ m_osds->send_message_osd_cluster(reply, ev.m_op->get_req()->get_connection());
+}
+
+
+void ReplicaActive::on_release(const ReplicaRelease& ev)
+{
+ DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
+ if (!reserved_by_my_primary) {
+ dout(5) << fmt::format(
+ "ReplicaActive::on_release() from {}: not reserved!",
+ ev.m_from)
+ << dendl;
+ return;
+ }
+ dout(10) << fmt::format("ReplicaActive::on_release() from {}", ev.m_from)
+ << dendl;
+ clear_reservation_by_remote_primary();
+}
+
+
+ReplicaActive::ReservationAttemptRes ReplicaActive::get_remote_reservation()
+{
+ using ReservationAttemptRes = ReplicaActive::ReservationAttemptRes;
+ DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
+ if (!scrbr->get_pg_cct()->_conf.get_val<bool>("osd_scrub_during_recovery") &&
+ m_osds->is_recovery_active()) {
+ return ReservationAttemptRes{
+ MOSDScrubReserve::REJECT, "recovery is active", false};
+ }
+
+ if (m_osds->get_scrub_services().inc_scrubs_remote(scrbr->get_spgid().pgid)) {
+ return ReservationAttemptRes{MOSDScrubReserve::GRANT, "", true};
+ } else {
+ return ReservationAttemptRes{
+ MOSDScrubReserve::REJECT, "failed to reserve remotely", false};
+ }
+}
+
+
+void ReplicaActive::clear_reservation_by_remote_primary()
{
- dout(10) << "-- state -->> ReplicaIdle" << dendl;
+ DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
+ dout(10) << "ReplicaActive::clear_reservation_by_remote_primary()" << dendl;
+ m_osds->get_scrub_services().dec_scrubs_remote(scrbr->get_spgid().pgid);
+ reserved_by_my_primary = false;
}
-ReplicaIdle::~ReplicaIdle()
+
+void ReplicaActive::check_for_updates(const StartReplica& ev)
{
+ DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
+ dout(10) << "ReplicaActive::check_for_updates()" << dendl;
+ post_event(ReplicaPushesUpd{});
}
+// ---------------- ReplicaActive/ReplicaIdle ---------------------------
-// ----------------------- ReplicaActiveOp --------------------------------
+ReplicaIdle::ReplicaIdle(my_context ctx)
+ : my_base(ctx)
+ , NamedSimply(context<ScrubMachine>().m_scrbr, "ReplicaActive/ReplicaIdle")
+{
+ dout(10) << "-- state -->> ReplicaActive/ReplicaIdle" << dendl;
+}
+
+
+// ------------- ReplicaActive/ReplicaActiveOp --------------------------
ReplicaActiveOp::ReplicaActiveOp(my_context ctx)
: my_base(ctx)
, NamedSimply(context<ScrubMachine>().m_scrbr, "ReplicaActiveOp")
{
- dout(10) << "-- state -->> ReplicaActiveOp" << dendl;
+ dout(10) << "-- state -->> ReplicaActive/ReplicaActiveOp" << dendl;
+ DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
+ scrbr->on_replica_init();
}
+
ReplicaActiveOp::~ReplicaActiveOp()
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
+ dout(10) << __func__ << dendl;
scrbr->replica_handling_done();
}
-// ----------------------- ReplicaWaitUpdates --------------------------------
+sc::result ReplicaActiveOp::react(const StartReplica&)
+{
+ DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
+ dout(10) << "ReplicaActiveOp::react(const StartReplica&)" << dendl;
+
+ const auto msg = fmt::format(
+ "osd.{} pg[{}]: new chunk request while still handling the previous one",
+ scrbr->get_whoami(), scrbr->get_spgid());
+ dout(1) << msg << dendl;
+ scrbr->get_clog()->warn() << msg;
+
+ post_event(ReplicaPushesUpd{});
+
+ // exit & re-enter the state
+ return transit<ReplicaActiveOp>();
+}
+
+// ------------- ReplicaActive/ReplicaWaitUpdates ------------------------
ReplicaWaitUpdates::ReplicaWaitUpdates(my_context ctx)
: my_base(ctx)
- , NamedSimply(context<ScrubMachine>().m_scrbr, "ReplicaWaitUpdates")
+ , NamedSimply(
+ context<ScrubMachine>().m_scrbr,
+ "ReplicaActive/ReplicaActiveOp/ReplicaWaitUpdates")
{
- dout(10) << "-- state -->> ReplicaWaitUpdates" << dendl;
- DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
- scrbr->on_replica_init();
+ dout(10) << "-- state -->> ReplicaActive/ReplicaActiveOp/ReplicaWaitUpdates"
+ << dendl;
}
+
/*
* Triggered externally, by the entity that had an update re pushes
*/
@@ -720,7 +837,6 @@ sc::result ReplicaWaitUpdates::react(const ReplicaPushesUpd&)
<< scrbr->pending_active_pushes() << dendl;
if (scrbr->pending_active_pushes() == 0) {
-
// done waiting
return transit<ReplicaBuildingMap>();
}
@@ -728,19 +844,21 @@ sc::result ReplicaWaitUpdates::react(const ReplicaPushesUpd&)
return discard_event();
}
+
// ----------------------- ReplicaBuildingMap -----------------------------------
ReplicaBuildingMap::ReplicaBuildingMap(my_context ctx)
: my_base(ctx)
- , NamedSimply(context<ScrubMachine>().m_scrbr, "ReplicaBuildingMap")
+ , NamedSimply(
+ context<ScrubMachine>().m_scrbr,
+ "ReplicaActive/ReplicaActiveOp/ReplicaBuildingMap")
{
- DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
- dout(10) << "-- state -->> ReplicaBuildingMap" << dendl;
- // and as we might have skipped ReplicaWaitUpdates:
- scrbr->on_replica_init();
+ dout(10) << "-- state -->> ReplicaActive/ReplicaActiveOp/ReplicaBuildingMap"
+ << dendl;
post_event(SchedReplica{});
}
+
sc::result ReplicaBuildingMap::react(const SchedReplica&)
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
@@ -751,7 +869,6 @@ sc::result ReplicaBuildingMap::react(const SchedReplica&)
dout(10) << "replica scrub job preempted" << dendl;
scrbr->send_preempted_replica();
- scrbr->replica_handling_done();
return transit<ReplicaIdle>();
}
diff --git a/src/osd/scrubber/scrub_machine.h b/src/osd/scrubber/scrub_machine.h
index cbce07fe183..7e9c887145a 100644
--- a/src/osd/scrubber/scrub_machine.h
+++ b/src/osd/scrubber/scrub_machine.h
@@ -48,39 +48,54 @@ namespace mpl = ::boost::mpl;
void on_event_creation(std::string_view nm);
void on_event_discard(std::string_view nm);
-// reservation grant/reject events carry the peer's response:
-/// a replica has granted our reservation request
-struct ReplicaGrant : sc::event<ReplicaGrant> {
- OpRequestRef m_op;
- pg_shard_t m_from;
- ReplicaGrant(OpRequestRef op, pg_shard_t from) : m_op{op}, m_from{from}
+template <typename EV>
+struct OpCarryingEvent : sc::event<EV> {
+ static constexpr const char* event_name = "<>";
+ const OpRequestRef m_op;
+ const pg_shard_t m_from;
+ OpCarryingEvent(OpRequestRef op, pg_shard_t from) : m_op{op}, m_from{from}
{
- on_event_creation("ReplicaGrant");
+ on_event_creation(static_cast<EV*>(this)->event_name);
}
+
+ OpCarryingEvent(const OpCarryingEvent&) = default;
+ OpCarryingEvent(OpCarryingEvent&&) = default;
+ OpCarryingEvent& operator=(const OpCarryingEvent&) = default;
+ OpCarryingEvent& operator=(OpCarryingEvent&&) = default;
+
void print(std::ostream* out) const
{
- *out << fmt::format("ReplicaGrant(from: {})", m_from);
+ *out << fmt::format("{} (from: {})", EV::event_name, m_from);
}
- std::string_view print() const { return "ReplicaGrant"; }
- ~ReplicaGrant() { on_event_discard("ReplicaGrant"); }
+ std::string_view print() const { return EV::event_name; }
+ ~OpCarryingEvent() { on_event_discard(EV::event_name); }
};
-/// a replica has denied our reservation request
-struct ReplicaReject : sc::event<ReplicaReject> {
- OpRequestRef m_op;
- pg_shard_t m_from;
- ReplicaReject(OpRequestRef op, pg_shard_t from) : m_op{op}, m_from{from}
- {
- on_event_creation("ReplicaReject");
- }
- void print(std::ostream* out) const
- {
- *out << fmt::format("ReplicaReject(from: {})", m_from);
+#define OP_EV(T) \
+ struct T : OpCarryingEvent<T> { \
+ static constexpr const char* event_name = #T; \
+ template <typename... Args> \
+ T(Args&&... args) : OpCarryingEvent(std::forward<Args>(args)...) \
+ { \
+ } \
}
- std::string_view print() const { return "ReplicaReject"; }
- ~ReplicaReject() { on_event_discard("ReplicaReject"); }
-};
+
+
+// reservation events carry peer's request/response data:
+
+/// a replica has granted our reservation request
+OP_EV(ReplicaGrant);
+
+/// a replica has denied our reservation request
+OP_EV(ReplicaReject);
+
+/// received Primary request for scrub reservation
+OP_EV(ReplicaReserveReq);
+
+/// explicit release request from the Primary
+OP_EV(ReplicaRelease);
+
#define MEV(E) \
struct E : sc::event<E> { \
@@ -149,15 +164,12 @@ MEV(IntLocalMapDone)
/// scrub_snapshot_metadata()
MEV(DigestUpdate)
-/// event emitted when the replica grants a reservation to the primary
-MEV(ReplicaGrantReservation)
+/// we are a replica for this PG
+MEV(ReplicaActivate)
/// initiating replica scrub
MEV(StartReplica)
-/// 'start replica' when there are no pending updates
-MEV(StartReplicaNoWait)
-
MEV(SchedReplica)
/// Update to active_pushes. 'active_pushes' represents recovery
@@ -196,8 +208,11 @@ struct NotActive; ///< the quiescent state. No active scrubbing.
struct Session; ///< either reserving or actively scrubbing
struct ReservingReplicas; ///< securing scrub resources from replicas' OSDs
struct ActiveScrubbing; ///< the active state for a Primary. A sub-machine.
-struct ReplicaIdle; ///< Initial reserved replica state
-struct ReplicaBuildingMap; ///< an active state for a replica.
+// the active states for a replica:
+struct ReplicaActive; ///< the quiescent state for a replica
+struct ReplicaActiveOp;
+struct ReplicaWaitUpdates;
+struct ReplicaBuildingMap;
class ScrubMachine : public sc::state_machine<ScrubMachine, NotActive> {
@@ -355,8 +370,8 @@ public:
*
* - a special end-of-recovery Primary scrub event ('AfterRepairScrub').
*
- * - (for a replica) 'StartReplica' or 'StartReplicaNoWait', triggered by
- * an incoming MOSDRepScrub message.
+ * - (if already in ReplicaActive): an incoming MOSDRepScrub triggers
+ * 'StartReplica'.
*
* note (20.8.21): originally, AfterRepairScrub was triggering a scrub without
* waiting for replica resources to be acquired. But once replicas started
@@ -366,11 +381,13 @@ public:
struct NotActive : sc::state<NotActive, ScrubMachine>, NamedSimply {
explicit NotActive(my_context ctx);
- using reactions =
- mpl::list<sc::custom_reaction<StartScrub>,
- // a scrubbing that was initiated at recovery completion:
- sc::custom_reaction<AfterRepairScrub>,
- sc::transition<ReplicaGrantReservation, ReplicaIdle>>;
+ using reactions = mpl::list<
+ sc::custom_reaction<StartScrub>,
+ // a scrubbing that was initiated at recovery completion:
+ sc::custom_reaction<AfterRepairScrub>,
+ // peering done, and we are a replica
+ sc::transition<ReplicaActivate, ReplicaActive>>;
+
sc::result react(const StartScrub&);
sc::result react(const AfterRepairScrub&);
};
@@ -596,47 +613,111 @@ struct WaitDigestUpdate : sc::state<WaitDigestUpdate, ActiveScrubbing>,
// ----------------------------- the "replica active" states
-/**
- * ReservedReplica
+/*
+ * The replica states:
+ *
+ * ReplicaActive - starts after being peered as a replica. Ends on interval.
+ * - maintain the "I am reserved by a primary" state;
+ * - handles reservation requests
+ *
+ * - ReplicaIdle - ready for a new scrub request
+ * * initial state of ReplicaActive
*
- * Parent state for replica states, Controls lifecycle for
- * PgScrubber::m_reservations.
+ * - ReplicaActiveOp - handling a single map request op
+ * * ReplicaWaitUpdates
+ * * ReplicaBuildingMap
*/
-struct ReservedReplica : sc::state<ReservedReplica, ScrubMachine, ReplicaIdle>,
+
+struct ReplicaIdle;
+
+struct ReplicaActive : sc::state<ReplicaActive, ScrubMachine, ReplicaIdle>,
NamedSimply {
- explicit ReservedReplica(my_context ctx);
- ~ReservedReplica();
+ explicit ReplicaActive(my_context ctx);
+ ~ReplicaActive();
- using reactions = mpl::list<sc::transition<FullReset, NotActive>>;
-};
+ /// handle a reservation request from a primary
+ void on_reserve_req(const ReplicaReserveReq&);
-struct ReplicaWaitUpdates;
+ /// handle a 'release' from a primary
+ void on_release(const ReplicaRelease&);
-/**
- * ReplicaIdle
- *
- * Replica is waiting for a map request.
- */
-struct ReplicaIdle : sc::state<ReplicaIdle, ReservedReplica>,
- NamedSimply {
- explicit ReplicaIdle(my_context ctx);
- ~ReplicaIdle();
+ void check_for_updates(const StartReplica&);
using reactions = mpl::list<
- sc::transition<StartReplica, ReplicaWaitUpdates>,
- sc::transition<StartReplicaNoWait, ReplicaBuildingMap>>;
+ // a reservation request from the primary
+ sc::in_state_reaction<
+ ReplicaReserveReq,
+ ReplicaActive,
+ &ReplicaActive::on_reserve_req>,
+ // an explicit release request from the primary
+ sc::in_state_reaction<
+ ReplicaRelease,
+ ReplicaActive,
+ &ReplicaActive::on_release>,
+ // when the interval ends - we may not be a replica anymore
+ sc::transition<IntervalChanged, NotActive>>;
+
+ private:
+ bool reserved_by_my_primary{false};
+
+ // shortcuts:
+ PG* m_pg;
+ OSDService* m_osds;
+
+ /// a convenience internal result structure
+ struct ReservationAttemptRes {
+ MOSDScrubReserve::ReserveMsgOp op; // GRANT or REJECT
+ std::string_view error_msg;
+ bool granted;
+ };
+
+ /// request a scrub resource from our local OSD
+ /// (after performing some checks)
+ ReservationAttemptRes get_remote_reservation();
+
+ void clear_reservation_by_remote_primary();
+};
+
+
+struct ReplicaIdle : sc::state<ReplicaIdle, ReplicaActive>, NamedSimply {
+ explicit ReplicaIdle(my_context ctx);
+ ~ReplicaIdle() = default;
+
+ // note the execution of check_for_updates() when transitioning to
+ // ReplicaActiveOp/ReplicaWaitUpdates. That would trigger a ReplicaPushesUpd
+ // event, which will be handled by ReplicaWaitUpdates.
+ using reactions = mpl::list<sc::transition<
+ StartReplica,
+ ReplicaWaitUpdates,
+ ReplicaActive,
+ &ReplicaActive::check_for_updates>>;
};
+
/**
- * ReservedActiveOp
+ * ReplicaActiveOp
*
- * Lifetime matches handling for a single map request op
+ * Lifetime matches handling for a single map request op.
*/
struct ReplicaActiveOp
- : sc::state<ReplicaActiveOp, ReservedReplica, ReplicaWaitUpdates>,
- NamedSimply {
+ : sc::state<ReplicaActiveOp, ReplicaActive, ReplicaWaitUpdates>,
+ NamedSimply {
explicit ReplicaActiveOp(my_context ctx);
~ReplicaActiveOp();
+
+ using reactions = mpl::list<sc::custom_reaction<StartReplica>>;
+
+ /**
+ * Handling the unexpected (read - caused by a bug) case of receiving a
+ * new chunk request while still handling the previous one.
+ * To note:
+ * - the primary is evidently no longer waiting for the results of the
+ * previous request. On the other hand
+ * - we must respond to the new request, as the primary would wait for
+ * it "forever"`,
+ * - and we should log this unexpected scenario clearly in the cluster log.
+ */
+ sc::result react(const StartReplica&);
};
/*
@@ -646,7 +727,7 @@ struct ReplicaActiveOp
* - the details of the Primary's request were internalized by PgScrubber;
* - 'active' scrubbing is set
*/
-struct ReplicaWaitUpdates : sc::state<ReplicaWaitUpdates, ReservedReplica>,
+struct ReplicaWaitUpdates : sc::state<ReplicaWaitUpdates, ReplicaActiveOp>,
NamedSimply {
explicit ReplicaWaitUpdates(my_context ctx);
using reactions = mpl::list<sc::custom_reaction<ReplicaPushesUpd>>;
@@ -655,8 +736,8 @@ struct ReplicaWaitUpdates : sc::state<ReplicaWaitUpdates, ReservedReplica>,
};
-struct ReplicaBuildingMap : sc::state<ReplicaBuildingMap, ReservedReplica>
- , NamedSimply {
+struct ReplicaBuildingMap : sc::state<ReplicaBuildingMap, ReplicaActiveOp>,
+ NamedSimply {
explicit ReplicaBuildingMap(my_context ctx);
using reactions = mpl::list<sc::custom_reaction<SchedReplica>>;
diff --git a/src/osd/scrubber/scrub_machine_lstnr.h b/src/osd/scrubber/scrub_machine_lstnr.h
index 4206c789f91..34247083f4f 100644
--- a/src/osd/scrubber/scrub_machine_lstnr.h
+++ b/src/osd/scrubber/scrub_machine_lstnr.h
@@ -196,12 +196,6 @@ struct ScrubMachineListener {
virtual void set_queued_or_active() = 0;
virtual void clear_queued_or_active() = 0;
- /// Release remote scrub reservation
- virtual void dec_scrubs_remote() = 0;
-
- /// Advance replica token
- virtual void advance_token() = 0;
-
/**
* Our scrubbing is blocked, waiting for an excessive length of time for
* our target chunk to be unlocked. We will set the corresponding flags,
@@ -235,4 +229,7 @@ struct ScrubMachineListener {
// temporary interface (to be discarded in a follow-up PR)
/// set the 'resources_failure' flag in the scrub-job object
virtual void flag_reservations_failure() = 0;
+
+ /// is this scrub more than just regular periodic scrub?
+ [[nodiscard]] virtual bool is_high_priority() const = 0;
};
diff --git a/src/osd/scrubber/scrub_reservations.cc b/src/osd/scrubber/scrub_reservations.cc
index 3194c9871d5..449856e7b45 100644
--- a/src/osd/scrubber/scrub_reservations.cc
+++ b/src/osd/scrubber/scrub_reservations.cc
@@ -49,12 +49,17 @@ ReplicaReservations::ReplicaReservations(ScrubMachineListener& scrbr)
});
m_next_to_request = m_sorted_secondaries.cbegin();
- // send out the 1'st request (unless we have no replicas)
- send_next_reservation_or_complete();
-
- m_slow_response_warn_timeout =
- m_scrubber.get_pg_cct()->_conf.get_val<milliseconds>(
- "osd_scrub_slow_reservation_response");
+ if (m_scrubber.is_high_priority()) {
+ // for high-priority scrubs (i.e. - user-initiated), no reservations are
+ // needed.
+ dout(10) << "high-priority scrub - no reservations needed" << dendl;
+ } else {
+ // send out the 1'st request (unless we have no replicas)
+ send_next_reservation_or_complete();
+ m_slow_response_warn_timeout =
+ m_scrubber.get_pg_cct()->_conf.get_val<milliseconds>(
+ "osd_scrub_slow_reservation_response");
+ }
}
void ReplicaReservations::release_all()
diff --git a/src/osd/scrubber/scrub_resources.cc b/src/osd/scrubber/scrub_resources.cc
index 179bd5e7e0e..dd9d31a1573 100644
--- a/src/osd/scrubber/scrub_resources.cc
+++ b/src/osd/scrubber/scrub_resources.cc
@@ -4,10 +4,12 @@
#include "./scrub_resources.h"
#include <fmt/format.h>
+#include <fmt/ranges.h>
#include "common/debug.h"
#include "include/ceph_assert.h"
+#include "osd/osd_types_fmt.h"
using ScrubResources = Scrub::ScrubResources;
@@ -19,28 +21,38 @@ ScrubResources::ScrubResources(
, conf{config}
{}
+// ------------------------- scrubbing as primary on this OSD -----------------
+
+// can we increase the number of concurrent scrubs performed by Primaries
+// on this OSD? note that counted separately from the number of scrubs
+// performed by replicas.
bool ScrubResources::can_inc_scrubs() const
{
std::lock_guard lck{resource_lock};
- if (scrubs_local + scrubs_remote < conf->osd_max_scrubs) {
- return true;
- }
- log_upwards(fmt::format(
- "{}== false. {} (local) + {} (remote) >= max ({})", __func__,
- scrubs_local, scrubs_remote, conf->osd_max_scrubs));
- return false;
+ return can_inc_local_scrubs_unlocked();
}
bool ScrubResources::inc_scrubs_local()
{
std::lock_guard lck{resource_lock};
- if (scrubs_local + scrubs_remote < conf->osd_max_scrubs) {
+ if (can_inc_local_scrubs_unlocked()) {
++scrubs_local;
+ log_upwards(fmt::format(
+ "{}: {} -> {} (max {}, remote {})", __func__, (scrubs_local - 1),
+ scrubs_local, conf->osd_max_scrubs, granted_reservations.size()));
+ return true;
+ }
+ return false;
+}
+
+bool ScrubResources::can_inc_local_scrubs_unlocked() const
+{
+ if (scrubs_local < conf->osd_max_scrubs) {
return true;
}
log_upwards(fmt::format(
- "{}: {} (local) + {} (remote) >= max ({})", __func__, scrubs_local,
- scrubs_remote, conf->osd_max_scrubs));
+ "{}: Cannot add local scrubs. Current counter ({}) >= max ({})", __func__,
+ scrubs_local, conf->osd_max_scrubs));
return false;
}
@@ -48,43 +60,62 @@ void ScrubResources::dec_scrubs_local()
{
std::lock_guard lck{resource_lock};
log_upwards(fmt::format(
- "{}: {} -> {} (max {}, remote {})", __func__, scrubs_local,
- (scrubs_local - 1), conf->osd_max_scrubs, scrubs_remote));
+ "{}: {} -> {} (max {}, remote {})",
+ __func__, scrubs_local, (scrubs_local - 1), conf->osd_max_scrubs,
+ granted_reservations.size()));
--scrubs_local;
ceph_assert(scrubs_local >= 0);
}
-bool ScrubResources::inc_scrubs_remote()
+// ------------------------- scrubbing on this OSD as replicas ----------------
+
+bool ScrubResources::inc_scrubs_remote(pg_t pgid)
{
std::lock_guard lck{resource_lock};
- if (scrubs_local + scrubs_remote < conf->osd_max_scrubs) {
+
+ // if this PG is already reserved - it's probably a benign bug.
+ // report it, but do not fail the reservation.
+ if (granted_reservations.contains(pgid)) {
+ log_upwards(fmt::format("{}: pg[{}] already reserved", __func__, pgid));
+ return true;
+ }
+
+ auto pre_op_cnt = granted_reservations.size();
+ if (std::cmp_less(pre_op_cnt, conf->osd_max_scrubs)) {
+ granted_reservations.insert(pgid);
log_upwards(fmt::format(
- "{}: {} -> {} (max {}, local {})", __func__, scrubs_remote,
- (scrubs_remote + 1), conf->osd_max_scrubs, scrubs_local));
- ++scrubs_remote;
+ "{}: pg[{}] reserved. Remote scrubs count changed from {} -> {} (max "
+ "{}, local {})",
+ __func__, pgid, pre_op_cnt, granted_reservations.size(),
+ conf->osd_max_scrubs, scrubs_local));
return true;
}
log_upwards(fmt::format(
- "{}: {} (local) + {} (remote) >= max ({})", __func__, scrubs_local,
- scrubs_remote, conf->osd_max_scrubs));
+ "{}: pg[{}] failed. Too many concurrent replica scrubs ({} >= max ({}))",
+ __func__, pgid, pre_op_cnt, conf->osd_max_scrubs));
return false;
}
-void ScrubResources::dec_scrubs_remote()
+void ScrubResources::dec_scrubs_remote(pg_t pgid)
{
std::lock_guard lck{resource_lock};
- log_upwards(fmt::format(
- "{}: {} -> {} (max {}, local {})", __func__, scrubs_remote,
- (scrubs_remote - 1), conf->osd_max_scrubs, scrubs_local));
- --scrubs_remote;
- ceph_assert(scrubs_remote >= 0);
+ // we might not have this PG in the set (e.g. if we are concluding a
+ // high priority scrub, one that does not require reservations)
+ auto cnt = granted_reservations.erase(pgid);
+ if (cnt) {
+ log_upwards(fmt::format(
+ "{}: remote reservation for {} removed -> {} (max {}, local {})",
+ __func__, pgid, granted_reservations.size(), conf->osd_max_scrubs,
+ scrubs_local));
+ }
}
void ScrubResources::dump_scrub_reservations(ceph::Formatter* f) const
{
std::lock_guard lck{resource_lock};
f->dump_int("scrubs_local", scrubs_local);
- f->dump_int("scrubs_remote", scrubs_remote);
+ f->dump_int("granted_reservations", granted_reservations.size());
+ f->dump_string("PGs being served", fmt::format("{}", granted_reservations));
f->dump_int("osd_max_scrubs", conf->osd_max_scrubs);
}
diff --git a/src/osd/scrubber/scrub_resources.h b/src/osd/scrubber/scrub_resources.h
index 890ee5d0e2f..75807a10f82 100644
--- a/src/osd/scrubber/scrub_resources.h
+++ b/src/osd/scrubber/scrub_resources.h
@@ -8,6 +8,7 @@
#include "common/ceph_mutex.h"
#include "common/config_proxy.h"
#include "common/Formatter.h"
+#include "osd/osd_types.h"
namespace Scrub {
@@ -28,8 +29,9 @@ class ScrubResources {
/// the number of concurrent scrubs performed by Primaries on this OSD
int scrubs_local{0};
- /// the number of active scrub reservations granted by replicas
- int scrubs_remote{0};
+ /// the set of PGs that have active scrub reservations as replicas
+ /// \todo come C++23 - consider std::flat_set<pg_t>
+ std::set<pg_t> granted_reservations;
mutable ceph::mutex resource_lock =
ceph::make_mutex("ScrubQueue::resource_lock");
@@ -38,6 +40,10 @@ class ScrubResources {
const ceph::common::ConfigProxy& conf;
+ /// an aux used to check available local scrubs. Must be called with
+ /// the resource lock held.
+ bool can_inc_local_scrubs_unlocked() const;
+
public:
explicit ScrubResources(
log_upwards_t log_access,
@@ -56,10 +62,10 @@ class ScrubResources {
void dec_scrubs_local();
/// increments the number of scrubs acting as a Replica
- bool inc_scrubs_remote();
+ bool inc_scrubs_remote(pg_t pgid);
/// decrements the number of scrubs acting as a Replica
- void dec_scrubs_remote();
+ void dec_scrubs_remote(pg_t pgid);
void dump_scrub_reservations(ceph::Formatter* f) const;
};
diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h
index 16810bba15c..ce269563f9a 100644
--- a/src/osd/scrubber_common.h
+++ b/src/osd/scrubber_common.h
@@ -18,12 +18,14 @@ struct PGPool;
namespace Scrub {
class ReplicaReservations;
+ struct ReplicaActive;
}
/// Facilitating scrub-related object access to private PG data
class ScrubberPasskey {
private:
friend class Scrub::ReplicaReservations;
+ friend struct Scrub::ReplicaActive;
friend class PrimaryLogScrub;
friend class PgScrubber;
friend class ScrubBackend;
@@ -169,8 +171,7 @@ struct requested_scrub_t {
* the value of auto_repair is determined in sched_scrub() (once per scrub.
* previous value is not remembered). Set if
* - allowed by configuration and backend, and
- * - must_scrub is not set (i.e. - this is a periodic scrub),
- * - time_for_deep was just set
+ * - for periodic scrubs: time_for_deep was just set
*/
bool auto_repair{false};
@@ -310,6 +311,9 @@ struct ScrubPgIF {
/// the OSD scrub queue
virtual void on_new_interval() = 0;
+ /// we are peered as a replica
+ virtual void on_replica_activate() = 0;
+
virtual void scrub_clear_state() = 0;
virtual void handle_query_state(ceph::Formatter* f) = 0;
diff --git a/src/pybind/mgr/dashboard/controllers/_crud.py b/src/pybind/mgr/dashboard/controllers/_crud.py
index 240a2b5ab8c..d65649cadb4 100644
--- a/src/pybind/mgr/dashboard/controllers/_crud.py
+++ b/src/pybind/mgr/dashboard/controllers/_crud.py
@@ -104,6 +104,7 @@ class Validator(Enum):
RGW_ROLE_NAME = 'rgwRoleName'
RGW_ROLE_PATH = 'rgwRolePath'
FILE = 'file'
+ RGW_ROLE_SESSION_DURATION = 'rgwRoleSessionDuration'
class FormField(NamedTuple):
@@ -224,6 +225,10 @@ class Container:
properties[field.key]['title'] = field.name
field_ui_schema['key'] = field_key
field_ui_schema['readonly'] = field.readonly
+ if field.readonly:
+ field_ui_schema['templateOptions'] = {
+ 'disabled': True
+ }
field_ui_schema['help'] = f'{field.help}'
field_ui_schema['validators'] = [i.value for i in field.validators]
items.append(field_ui_schema)
@@ -307,6 +312,7 @@ class CRUDMeta(SerializableClass):
self.forms = []
self.columnKey = ''
self.detail_columns = []
+ self.resource = ''
class CRUDCollectionMethod(NamedTuple):
@@ -330,6 +336,7 @@ class CRUDEndpoint:
actions: Optional[List[TableAction]] = None,
permissions: Optional[List[str]] = None, forms: Optional[List[Form]] = None,
column_key: Optional[str] = None,
+ resource: Optional[str] = None,
meta: CRUDMeta = CRUDMeta(), get_all: Optional[CRUDCollectionMethod] = None,
create: Optional[CRUDCollectionMethod] = None,
delete: Optional[CRUDCollectionMethod] = None,
@@ -352,6 +359,7 @@ class CRUDEndpoint:
self.detail_columns = detail_columns if detail_columns is not None else []
self.extra_endpoints = extra_endpoints if extra_endpoints is not None else []
self.selection_type = selection_type
+ self.resource = resource
def __call__(self, cls: Any):
self.create_crud_class(cls)
@@ -415,6 +423,7 @@ class CRUDEndpoint:
self.generate_forms(model_key)
self.set_permissions()
self.set_column_key()
+ self.set_table_resource()
self.get_detail_columns()
selection_type = self.__class__.outer_self.selection_type
self.__class__.outer_self.meta.table.set_selection_type(selection_type)
@@ -468,6 +477,10 @@ class CRUDEndpoint:
if self.__class__.outer_self.column_key:
self.outer_self.meta.columnKey = self.__class__.outer_self.column_key
+ def set_table_resource(self):
+ if self.__class__.outer_self.resource:
+ self.outer_self.meta.resource = self.__class__.outer_self.resource
+
class_name = self.router.path.replace('/', '')
meta_class = type(f'{class_name}_CRUDClassMetadata',
(RESTController,),
@@ -478,6 +491,7 @@ class CRUDEndpoint:
'generate_forms': generate_forms,
'set_permissions': set_permissions,
'set_column_key': set_column_key,
+ 'set_table_resource': set_table_resource,
'get_detail_columns': get_detail_columns,
'outer_self': self,
})
diff --git a/src/pybind/mgr/dashboard/controllers/ceph_users.py b/src/pybind/mgr/dashboard/controllers/ceph_users.py
index e1bdc157091..022f8f36c42 100644
--- a/src/pybind/mgr/dashboard/controllers/ceph_users.py
+++ b/src/pybind/mgr/dashboard/controllers/ceph_users.py
@@ -174,7 +174,7 @@ edit_form = Form(path='/cluster/user/edit',
TableAction(name='Create', permission='create', icon=Icon.ADD.value,
routerLink='/cluster/user/create'),
TableAction(name='Edit', permission='update', icon=Icon.EDIT.value,
- click='edit'),
+ click='edit', routerLink='/cluster/user/edit'),
TableAction(name='Delete', permission='delete', icon=Icon.DESTROY.value,
click='delete', disable=True),
TableAction(name='Import', permission='create', icon=Icon.IMPORT.value,
@@ -185,6 +185,7 @@ edit_form = Form(path='/cluster/user/edit',
permissions=[Scope.CONFIG_OPT],
forms=[create_form, edit_form, import_user_form],
column_key='entity',
+ resource='user',
get_all=CRUDCollectionMethod(
func=CephUserEndpoints.user_list,
doc=EndpointDoc("Get Ceph Users")
diff --git a/src/pybind/mgr/dashboard/controllers/cephfs.py b/src/pybind/mgr/dashboard/controllers/cephfs.py
index ed83f91d0c9..61b31eb809f 100644
--- a/src/pybind/mgr/dashboard/controllers/cephfs.py
+++ b/src/pybind/mgr/dashboard/controllers/cephfs.py
@@ -335,13 +335,16 @@ class CephFS(RESTController):
standby_table = self.get_standby_table(fsmap['standbys'], mds_versions)
+ flags = mdsmap['flags_state']
+
return {
"cephfs": {
"id": fs_id,
"name": mdsmap['fs_name'],
"client_count": client_count,
"ranks": rank_table,
- "pools": pools_table
+ "pools": pools_table,
+ "flags": flags,
},
"standbys": standby_table,
"versions": mds_versions
@@ -673,7 +676,7 @@ class CephFsUi(CephFS):
@APIDoc('CephFS Subvolume Management API', 'CephFSSubvolume')
class CephFSSubvolume(RESTController):
- def get(self, vol_name: str, group_name: str = ""):
+ def get(self, vol_name: str, group_name: str = "", info=True):
params = {'vol_name': vol_name}
if group_name:
params['group_name'] = group_name
@@ -684,15 +687,17 @@ class CephFSSubvolume(RESTController):
f'Failed to list subvolumes for volume {vol_name}: {err}'
)
subvolumes = json.loads(out)
- for subvolume in subvolumes:
- params['sub_name'] = subvolume['name']
- error_code, out, err = mgr.remote('volumes', '_cmd_fs_subvolume_info', None,
- params)
- if error_code != 0:
- raise DashboardException(
- f'Failed to get info for subvolume {subvolume["name"]}: {err}'
- )
- subvolume['info'] = json.loads(out)
+
+ if info:
+ for subvolume in subvolumes:
+ params['sub_name'] = subvolume['name']
+ error_code, out, err = mgr.remote('volumes', '_cmd_fs_subvolume_info', None,
+ params)
+ if error_code != 0:
+ raise DashboardException(
+ f'Failed to get info for subvolume {subvolume["name"]}: {err}'
+ )
+ subvolume['info'] = json.loads(out)
return subvolumes
@RESTController.Resource('GET')
@@ -749,12 +754,27 @@ class CephFSSubvolume(RESTController):
component='cephfs')
return f'Subvolume {subvol_name} removed successfully'
+ @RESTController.Resource('GET')
+ def exists(self, vol_name: str, group_name=''):
+ params = {'vol_name': vol_name}
+ if group_name:
+ params['group_name'] = group_name
+ error_code, out, err = mgr.remote(
+ 'volumes', '_cmd_fs_subvolume_exist', None, params)
+ if error_code != 0:
+ raise DashboardException(
+ f'Failed to check if subvolume exists: {err}'
+ )
+ if out == 'no subvolume exists':
+ return False
+ return True
+
@APIRouter('/cephfs/subvolume/group', Scope.CEPHFS)
@APIDoc("Cephfs Subvolume Group Management API", "CephfsSubvolumeGroup")
class CephFSSubvolumeGroups(RESTController):
- def get(self, vol_name):
+ def get(self, vol_name, info=True):
if not vol_name:
raise DashboardException(
f'Error listing subvolume groups for {vol_name}')
@@ -764,15 +784,17 @@ class CephFSSubvolumeGroups(RESTController):
raise DashboardException(
f'Error listing subvolume groups for {vol_name}')
subvolume_groups = json.loads(out)
- for group in subvolume_groups:
- error_code, out, err = mgr.remote('volumes', '_cmd_fs_subvolumegroup_info',
- None, {'vol_name': vol_name,
- 'group_name': group['name']})
- if error_code != 0:
- raise DashboardException(
- f'Failed to get info for subvolume group {group["name"]}: {err}'
- )
- group['info'] = json.loads(out)
+
+ if info:
+ for group in subvolume_groups:
+ error_code, out, err = mgr.remote('volumes', '_cmd_fs_subvolumegroup_info',
+ None, {'vol_name': vol_name,
+ 'group_name': group['name']})
+ if error_code != 0:
+ raise DashboardException(
+ f'Failed to get info for subvolume group {group["name"]}: {err}'
+ )
+ group['info'] = json.loads(out)
return subvolume_groups
@RESTController.Resource('GET')
@@ -813,3 +835,61 @@ class CephFSSubvolumeGroups(RESTController):
f'Failed to delete subvolume group {group_name}: {err}'
)
return f'Subvolume group {group_name} removed successfully'
+
+
+@APIRouter('/cephfs/subvolume/snapshot', Scope.CEPHFS)
+@APIDoc("Cephfs Subvolume Snapshot Management API", "CephfsSubvolumeSnapshot")
+class CephFSSubvolumeSnapshots(RESTController):
+ def get(self, vol_name: str, subvol_name, group_name: str = '', info=True):
+ params = {'vol_name': vol_name, 'sub_name': subvol_name}
+ if group_name:
+ params['group_name'] = group_name
+ error_code, out, err = mgr.remote('volumes', '_cmd_fs_subvolume_snapshot_ls', None,
+ params)
+ if error_code != 0:
+ raise DashboardException(
+ f'Failed to list subvolume snapshots for subvolume {subvol_name}: {err}'
+ )
+ snapshots = json.loads(out)
+
+ if info:
+ for snapshot in snapshots:
+ params['snap_name'] = snapshot['name']
+ error_code, out, err = mgr.remote('volumes', '_cmd_fs_subvolume_snapshot_info',
+ None, params)
+ if error_code != 0:
+ raise DashboardException(
+ f'Failed to get info for subvolume snapshot {snapshot["name"]}: {err}'
+ )
+ snapshot['info'] = json.loads(out)
+ return snapshots
+
+
+@APIRouter('/cephfs/snaphost/schedule', Scope.CEPHFS)
+@APIDoc("Cephfs Snapshot Scheduling API", "CephFSSnapshotSchedule")
+class CephFSSnapshotSchedule(RESTController):
+
+ def list(self, fs: str, path: str = '/', recursive: bool = True):
+ error_code, out, err = mgr.remote('snap_schedule', 'snap_schedule_list',
+ path, recursive, fs, 'plain')
+
+ if len(out) == 0:
+ return []
+
+ snapshot_schedule_list = out.split('\n')
+ output = []
+
+ for snap in snapshot_schedule_list:
+ current_path = snap.strip().split(' ')[0]
+ error_code, status_out, err = mgr.remote('snap_schedule', 'snap_schedule_get',
+ current_path, fs, 'plain')
+ output.append(json.loads(status_out))
+
+ output_json = json.dumps(output)
+
+ if error_code != 0:
+ raise DashboardException(
+ f'Failed to get list of snapshot schedules for path {path}: {err}'
+ )
+
+ return json.loads(output_json)
diff --git a/src/pybind/mgr/dashboard/controllers/rgw.py b/src/pybind/mgr/dashboard/controllers/rgw.py
index 9bc1b489ede..661dc8afb6d 100644
--- a/src/pybind/mgr/dashboard/controllers/rgw.py
+++ b/src/pybind/mgr/dashboard/controllers/rgw.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
+# pylint: disable=C0302
import json
import logging
import re
@@ -290,6 +291,10 @@ class RgwBucket(RgwRESTController):
rgw_client = RgwClient.admin_instance()
return rgw_client.get_bucket_policy(bucket)
+ def _set_tags(self, bucket_name, tags, daemon_name, owner):
+ rgw_client = RgwClient.instance(owner, daemon_name)
+ return rgw_client.set_tags(bucket_name, tags)
+
@staticmethod
def strip_tenant_from_bucket_name(bucket_name):
# type (str) -> str
@@ -355,7 +360,7 @@ class RgwBucket(RgwRESTController):
lock_enabled='false', lock_mode=None,
lock_retention_period_days=None,
lock_retention_period_years=None, encryption_state='false',
- encryption_type=None, key_id=None, daemon_name=None):
+ encryption_type=None, key_id=None, tags=None, daemon_name=None):
lock_enabled = str_to_bool(lock_enabled)
encryption_state = str_to_bool(encryption_state)
try:
@@ -371,6 +376,9 @@ class RgwBucket(RgwRESTController):
if encryption_state:
self._set_encryption(bucket, encryption_type, key_id, daemon_name, uid)
+ if tags:
+ self._set_tags(bucket, tags, daemon_name, uid)
+
return result
except RequestException as e: # pragma: no cover - handling is too obvious
raise DashboardException(e, http_status_code=500, component='rgw')
@@ -380,7 +388,7 @@ class RgwBucket(RgwRESTController):
encryption_state='false', encryption_type=None, key_id=None,
mfa_delete=None, mfa_token_serial=None, mfa_token_pin=None,
lock_mode=None, lock_retention_period_days=None,
- lock_retention_period_years=None, daemon_name=None):
+ lock_retention_period_years=None, tags=None, daemon_name=None):
encryption_state = str_to_bool(encryption_state)
# When linking a non-tenant-user owned bucket to a tenanted user, we
# need to prefix bucket name with '/'. e.g. photos -> /photos
@@ -420,6 +428,8 @@ class RgwBucket(RgwRESTController):
self._set_encryption(bucket_name, encryption_type, key_id, daemon_name, uid)
if encryption_status['Status'] == 'Enabled' and (not encryption_state):
self._delete_encryption(bucket_name, daemon_name, uid)
+ if tags:
+ self._set_tags(bucket_name, tags, daemon_name, uid)
return self._append_bid(result)
def delete(self, bucket, purge_objects='true', daemon_name=None):
@@ -717,6 +727,36 @@ class RGWRoleEndpoints:
rgw_client.create_role(role_name, role_path, role_assume_policy_doc)
return f'Role {role_name} created successfully'
+ @staticmethod
+ def role_update(_, role_name: str, max_session_duration: str):
+ assert role_name
+ assert max_session_duration
+ # convert max_session_duration which is in hours to seconds
+ max_session_duration = int(float(max_session_duration) * 3600)
+ rgw_client = RgwClient.admin_instance()
+ rgw_client.update_role(role_name, str(max_session_duration))
+ return f'Role {role_name} updated successfully'
+
+ @staticmethod
+ def role_delete(_, role_name: str):
+ assert role_name
+ rgw_client = RgwClient.admin_instance()
+ rgw_client.delete_role(role_name)
+ return f'Role {role_name} deleted successfully'
+
+ @staticmethod
+ def model(role_name: str):
+ assert role_name
+ rgw_client = RgwClient.admin_instance()
+ role = rgw_client.get_role(role_name)
+ model = {'role_name': '', 'max_session_duration': ''}
+ model['role_name'] = role['RoleName']
+
+ # convert maxsessionduration which is in seconds to hours
+ if role['MaxSessionDuration']:
+ model['max_session_duration'] = role['MaxSessionDuration'] / 3600
+ return model
+
# pylint: disable=C0301
assume_role_policy_help = (
@@ -725,6 +765,10 @@ assume_role_policy_help = (
'target="_blank">click here.</a>'
)
+max_session_duration_help = (
+ 'The maximum session duration (in hours) that you want to set for the specified role.This setting can have a value from 1 hour to 12 hours.' # noqa: E501
+)
+
create_container = VerticalContainer('Create Role', 'create_role', fields=[
FormField('Role name', 'role_name', validators=[Validator.RGW_ROLE_NAME]),
FormField('Path', 'role_path', validators=[Validator.RGW_ROLE_PATH]),
@@ -734,29 +778,58 @@ create_container = VerticalContainer('Create Role', 'create_role', fields=[
field_type='textarea',
validators=[Validator.JSON]),
])
-create_role_form = Form(path='/rgw/roles/create',
+
+edit_container = VerticalContainer('Edit Role', 'edit_role', fields=[
+ FormField('Role name', 'role_name', readonly=True),
+ FormField('Max Session Duration', 'max_session_duration',
+ help=max_session_duration_help,
+ validators=[Validator.RGW_ROLE_SESSION_DURATION])
+])
+
+create_role_form = Form(path='/create',
root_container=create_container,
task_info=FormTaskInfo("IAM RGW Role '{role_name}' created successfully",
['role_name']),
method_type=MethodType.POST.value)
+edit_role_form = Form(path='/edit',
+ root_container=edit_container,
+ task_info=FormTaskInfo("IAM RGW Role '{role_name}' edited successfully",
+ ['role_name']),
+ method_type=MethodType.PUT.value,
+ model_callback=RGWRoleEndpoints.model)
+
@CRUDEndpoint(
router=APIRouter('/rgw/roles', Scope.RGW),
doc=APIDoc("List of RGW roles", "RGW"),
actions=[
TableAction(name='Create', permission='create', icon=Icon.ADD.value,
- routerLink='/rgw/roles/create')
+ routerLink='/rgw/roles/create'),
+ TableAction(name='Edit', permission='update', icon=Icon.EDIT.value,
+ click='edit', routerLink='/rgw/roles/edit'),
+ TableAction(name='Delete', permission='delete', icon=Icon.DESTROY.value,
+ click='delete', disable=True),
],
- forms=[create_role_form],
- permissions=[Scope.CONFIG_OPT],
+ forms=[create_role_form, edit_role_form],
+ column_key='RoleName',
+ resource='Role',
+ permissions=[Scope.RGW],
get_all=CRUDCollectionMethod(
func=RGWRoleEndpoints.role_list,
doc=EndpointDoc("List RGW roles")
),
create=CRUDCollectionMethod(
func=RGWRoleEndpoints.role_create,
- doc=EndpointDoc("Create Ceph User")
+ doc=EndpointDoc("Create RGW role")
+ ),
+ edit=CRUDCollectionMethod(
+ func=RGWRoleEndpoints.role_update,
+ doc=EndpointDoc("Edit RGW role")
+ ),
+ delete=CRUDCollectionMethod(
+ func=RGWRoleEndpoints.role_delete,
+ doc=EndpointDoc("Delete RGW role")
),
set_column={
"CreateDate": {'cellTemplate': 'date'},
diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/common/table-helper.feature.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/common/table-helper.feature.po.ts
index 82a2c7c35cd..330950acd44 100644
--- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/common/table-helper.feature.po.ts
+++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/common/table-helper.feature.po.ts
@@ -54,6 +54,13 @@ Then('I should not see a row with {string}', (row: string) => {
);
});
+Then('I should see a table in the expanded row', () => {
+ cy.get('.datatable-row-detail').within(() => {
+ cy.get('cd-table').should('exist');
+ cy.get('datatable-scroller, .empty-row');
+ });
+});
+
Then('I should not see a row with {string} in the expanded row', (row: string) => {
cy.get('.datatable-row-detail').within(() => {
cy.get('cd-table .search input').first().clear().type(row);
@@ -133,3 +140,9 @@ And('I should see row {string} have {string} on this tab', (row: string, options
});
}
});
+
+Then('I should see an alert {string} in the expanded row', (alert: string) => {
+ cy.get('.datatable-row-detail').within(() => {
+ cy.get('.alert-panel-text').contains(alert);
+ });
+});
diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/filesystems.e2e-spec.feature b/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/filesystems.e2e-spec.feature
index 2c08fb56eff..54fb1a8139e 100644
--- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/filesystems.e2e-spec.feature
+++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/filesystems.e2e-spec.feature
@@ -12,19 +12,20 @@ Feature: CephFS Management
And I click on "Create File System" button
Then I should see a row with "test_cephfs"
- Scenario: Edit CephFS Volume
- Given I am on the "cephfs" page
- And I select a row "test_cephfs"
- And I click on "Edit" button
- And enter "name" "test_cephfs_edit"
- And I click on "Edit File System" button
- Then I should see a row with "test_cephfs_edit"
+ # Should be uncommented once the pre-requisite is fixed
+ # Scenario: Edit CephFS Volume
+ # Given I am on the "cephfs" page
+ # And I select a row "test_cephfs"
+ # And I click on "Edit" button
+ # And enter "name" "test_cephfs_edit"
+ # And I click on "Edit File System" button
+ # Then I should see a row with "test_cephfs_edit"
Scenario: Remove CephFS Volume
Given I am on the "cephfs" page
- And I select a row "test_cephfs_edit"
+ And I select a row "test_cephfs"
And I click on "Remove" button from the table actions
Then I should see the modal
And I check the tick box in modal
And I click on "Remove File System" button
- Then I should not see a row with "test_cephfs_edit"
+ Then I should not see a row with "test_cephfs"
diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/snapshots.e2e-spec.feature b/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/snapshots.e2e-spec.feature
new file mode 100644
index 00000000000..002282172bb
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/snapshots.e2e-spec.feature
@@ -0,0 +1,53 @@
+Feature: CephFS Snapshot Management
+
+ Goal: To test out the CephFS snapshot management features
+
+ Background: Login
+ Given I am logged in
+
+ Scenario: Create a CephFS Volume
+ Given I am on the "cephfs" page
+ And I click on "Create" button
+ And enter "name" "test_cephfs"
+ And I click on "Create File System" button
+ Then I should see a row with "test_cephfs"
+
+ Scenario: Snapshots tab without a subvolume
+ Given I am on the "cephfs" page
+ When I expand the row "test_cephfs"
+ And I go to the "Snapshots" tab
+ Then I should see an alert "No subvolumes are present" in the expanded row
+
+ Scenario: Create a CephFS Subvolume
+ Given I am on the "cephfs" page
+ When I expand the row "test_cephfs"
+ And I go to the "Subvolumes" tab
+ And I click on "Create" button from the expanded row
+ And enter "subvolumeName" "test_subvolume" in the modal
+ And I click on "Create Subvolume" button
+ Then I should see a row with "test_subvolume" in the expanded row
+
+ Scenario: Show the CephFS Snapshots view
+ Given I am on the "cephfs" page
+ When I expand the row "test_cephfs"
+ And I go to the "Snapshots" tab
+ Then I should see a table in the expanded row
+
+ Scenario: Remove a CephFS Subvolume
+ Given I am on the "cephfs" page
+ When I expand the row "test_cephfs"
+ And I go to the "Subvolumes" tab
+ When I select a row "test_subvolume" in the expanded row
+ And I click on "Remove" button from the table actions in the expanded row
+ And I check the tick box in modal
+ And I click on "Remove Subvolume" button
+ Then I should not see a row with "test_subvolume" in the expanded row
+
+ Scenario: Remove CephFS Volume
+ Given I am on the "cephfs" page
+ And I select a row "test_cephfs"
+ And I click on "Remove" button from the table actions
+ Then I should see the modal
+ And I check the tick box in modal
+ And I click on "Remove File System" button
+ Then I should not see a row with "test_cephfs"
diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/subvolume-groups.e2e-spec.feature b/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/subvolume-groups.e2e-spec.feature
index 66e3f726a66..e53df64771d 100644
--- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/subvolume-groups.e2e-spec.feature
+++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/subvolume-groups.e2e-spec.feature
@@ -21,7 +21,7 @@ Feature: CephFS Subvolume Group management
And I click on "Create Subvolume group" button
Then I should see a row with "test_subvolume_group" in the expanded row
- Scenario: Edit a CephFS Subvolume
+ Scenario: Edit a CephFS Subvolume Group
Given I am on the "cephfs" page
When I expand the row "test_cephfs"
And I go to the "Subvolume groups" tab
@@ -31,7 +31,7 @@ Feature: CephFS Subvolume Group management
And I click on "Edit Subvolume group" button
Then I should see row "test_subvolume_group" of the expanded row to have a usage bar
- Scenario: Remove a CephFS Subvolume
+ Scenario: Remove a CephFS Subvolume Group
Given I am on the "cephfs" page
When I expand the row "test_cephfs"
And I go to the "Subvolume groups" tab
diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/roles.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/roles.e2e-spec.ts
index 597f7d1be88..80a8b0ec902 100644
--- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/roles.e2e-spec.ts
+++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/roles.e2e-spec.ts
@@ -9,11 +9,21 @@ describe('RGW roles page', () => {
});
describe('Create, Edit & Delete rgw roles', () => {
+ const roleName = 'testRole';
+
it('should create rgw roles', () => {
roles.navigateTo('create');
- roles.create('testRole', '/', '{}');
+ roles.create(roleName, '/', '{}');
roles.navigateTo();
- roles.checkExist('testRole', true);
+ roles.checkExist(roleName, true);
+ });
+
+ it('should edit rgw role', () => {
+ roles.edit(roleName, 3);
+ });
+
+ it('should delete rgw role', () => {
+ roles.delete(roleName);
});
});
});
diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/roles.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/roles.po.ts
index b72ca5df9a7..717655b2f08 100644
--- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/roles.po.ts
+++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/roles.po.ts
@@ -11,18 +11,36 @@ export class RolesPageHelper extends PageHelper {
columnIndex = {
roleName: 2,
path: 3,
- arn: 4
+ arn: 4,
+ createDate: 5,
+ maxSessionDuration: 6
};
@PageHelper.restrictTo(pages.create.url)
create(name: string, path: string, policyDocument: string) {
- cy.get('#formly_3_string_role_name_0').type(name);
- cy.get('#formly_3_textarea_role_assume_policy_doc_2').type(policyDocument);
- cy.get('#formly_3_string_role_path_1').type(path);
+ cy.get('[id$="string_role_name_0"]').type(name);
+ cy.get('[id$="role_assume_policy_doc_2"]').type(policyDocument);
+ cy.get('[id$="role_path_1"]').type(path);
cy.get("[aria-label='Create Role']").should('exist').click();
cy.get('cd-crud-table').should('exist');
}
+ edit(name: string, maxSessionDuration: number) {
+ this.navigateEdit(name);
+ cy.get('[id$="max_session_duration_1"]').clear().type(maxSessionDuration.toString());
+ cy.get("[aria-label='Edit Role']").should('exist').click();
+ cy.get('cd-crud-table').should('exist');
+
+ this.getTableCell(this.columnIndex.roleName, name)
+ .click()
+ .parent()
+ .find(`datatable-body-cell:nth-child(${this.columnIndex.maxSessionDuration})`)
+ .should(($elements) => {
+ const roleName = $elements.map((_, el) => el.textContent).get();
+ expect(roleName).to.include(`${maxSessionDuration} hours`);
+ });
+ }
+
@PageHelper.restrictTo(pages.index.url)
checkExist(name: string, exist: boolean) {
this.getTableCell(this.columnIndex.roleName, name).should(($elements) => {
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts
index 38ae3a4affd..2ba634fa25d 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts
@@ -361,7 +361,7 @@ const routes: Routes = [
data: { breadcrumbs: ActionLabels.CREATE }
},
{
- path: `${URLVerbs.EDIT}/:name`,
+ path: `${URLVerbs.EDIT}/:id`,
component: CephfsVolumeFormComponent,
data: { breadcrumbs: ActionLabels.EDIT }
}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.html
index 05235d16ccd..f8d0fa80320 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.html
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.html
@@ -15,6 +15,18 @@
i18n
*ngIf="!editing">Orchestrator is not configured. Deploy MDS daemons manually after creating the volume.</cd-alert-panel>
</ng-container>
+
+ <cd-alert-panel type="info"
+ class="m-3"
+ spacingClass="mt-3"
+ i18n
+ *ngIf="editing && disableRename">
+ <p>The File System can only be renamed if it is shutdown and `refuse_client_session` is set to true.
+ Follow the steps below in the command line and refresh the page:</p>
+ <cd-code-block [codes]="[fsFailCmd]"></cd-code-block>
+ <cd-code-block [codes]="[fsSetCmd]"></cd-code-block>
+ </cd-alert-panel>
+
<div class="card-body">
<!-- Name -->
<div class="form-group row">
@@ -98,6 +110,7 @@
<cd-form-button-panel (submitActionEvent)="submit()"
[form]="form"
[submitText]="(action | titlecase) + ' ' + (resource | upperFirst)"
+ [disabled]="editing ? disableRename: false"
wrappingClass="text-right"></cd-form-button-panel>
</div>
</div>
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.spec.ts
index 461f4bca052..520f726d555 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.spec.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.spec.ts
@@ -78,5 +78,42 @@ describe('CephfsVolumeFormComponent', () => {
expect(label).toBeNull();
expect(hosts).toBeNull();
});
+
+ it('should disable renaming and show info alert if disableRename is true', () => {
+ component.disableRename = true;
+ component.ngOnInit();
+ fixture.detectChanges();
+ const alertPanel = fixture.debugElement.query(By.css('cd-alert-panel'));
+ expect(alertPanel).not.toBeNull();
+ });
+
+ it('should not show the alert if disableRename is false', () => {
+ component.disableRename = false;
+ component.ngOnInit();
+ fixture.detectChanges();
+ const alertPanel = fixture.debugElement.query(By.css('cd-alert-panel'));
+ expect(alertPanel).toBeNull();
+ });
+
+ it('should disable the submit button only if disableRename is true', () => {
+ component.disableRename = true;
+ component.ngOnInit();
+ fixture.detectChanges();
+ const submitButton = fixture.debugElement.query(By.css('button[type=submit]'));
+ expect(submitButton.nativeElement.disabled).toBeTruthy();
+
+ // the submit button should only be disabled when the form is in edit mode
+ component.editing = false;
+ component.ngOnInit();
+ fixture.detectChanges();
+ expect(submitButton.nativeElement.disabled).toBeFalsy();
+
+ // submit button should be enabled if disableRename is false
+ component.editing = true;
+ component.disableRename = false;
+ component.ngOnInit();
+ fixture.detectChanges();
+ expect(submitButton.nativeElement.disabled).toBeFalsy();
+ });
});
});
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.ts
index 6d84e33c7b6..b0f90979c25 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.ts
@@ -51,6 +51,11 @@ export class CephfsVolumeFormComponent extends CdForm implements OnInit {
labels: string[];
hasOrchestrator: boolean;
currentVolumeName: string;
+ fsId: number;
+ disableRename: boolean = true;
+
+ fsFailCmd: string;
+ fsSetCmd: string;
constructor(
private router: Router,
@@ -101,9 +106,22 @@ export class CephfsVolumeFormComponent extends CdForm implements OnInit {
ngOnInit() {
if (this.editing) {
- this.route.params.subscribe((params: { name: string }) => {
- this.currentVolumeName = params.name;
+ this.route.params.subscribe((params: { id: string }) => {
+ this.fsId = Number(params.id);
+ });
+
+ this.cephfsService.getCephfs(this.fsId).subscribe((resp: object) => {
+ this.currentVolumeName = resp['cephfs']['name'];
this.form.get('name').setValue(this.currentVolumeName);
+
+ this.disableRename = !(
+ !resp['cephfs']['flags']['joinable'] && resp['cephfs']['flags']['refuse_client_session']
+ );
+ if (this.disableRename) {
+ this.form.get('name').disable();
+ this.fsFailCmd = `ceph fs fail ${this.currentVolumeName}`;
+ this.fsSetCmd = `ceph fs set ${this.currentVolumeName} refuse_client_session true`;
+ }
});
} else {
const hostContext = new CdTableFetchDataContext(() => undefined);
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-list/cephfs-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-list/cephfs-list.component.ts
index 0d55845ab59..26e79727c12 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-list/cephfs-list.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-list/cephfs-list.component.ts
@@ -87,7 +87,7 @@ export class CephfsListComponent extends ListWithDetails implements OnInit {
permission: 'update',
icon: Icons.edit,
click: () =>
- this.router.navigate([this.urlBuilder.getEdit(this.selection.first().mdsmap.fs_name)])
+ this.router.navigate([this.urlBuilder.getEdit(String(this.selection.first().id))])
},
{
permission: 'delete',
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.html
new file mode 100644
index 00000000000..2e270057d57
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.html
@@ -0,0 +1,58 @@
+<ng-container *ngIf="isLoading$ | async">
+ <cd-loading-panel>
+ <span i18n>Loading snapshot schedules...</span>
+ </cd-loading-panel>
+</ng-container>
+
+<ng-template #pathTpl
+ let-row="row">
+ <span
+ class="fw-bold"
+ [ngbTooltip]="fullpathTpl"
+ triggers="click:blur">{{row.path | path}}</span>
+
+ <span *ngIf="row.active; else inactiveStatusTpl">
+ <i [ngClass]="[icons.success, icons.large]"
+ ngbTooltip="{{row.path}} is active"
+ class="text-success"></i>
+ </span>
+
+ <ng-template #inactiveStatusTpl>
+ <i [ngClass]="[icons.warning, icons.large]"
+ class="text-warning"
+ ngbTooltip="{{row.path}} has been deactivated"></i>
+ </ng-template>
+
+ <ng-template #fullpathTpl>
+ <span data-toggle="tooltip"
+ [title]="row.path"
+ class="font-monospace">{{ row.path }}
+ <cd-copy-2-clipboard-button *ngIf="row.path"
+ [source]="row.path"
+ [byId]="false"
+ [showIconOnly]="true">
+ </cd-copy-2-clipboard-button>
+ </span>
+</ng-template>
+
+</ng-template>
+
+<cd-table
+ [data]="snapshotSchedules$ | async"
+ columnMode="flex"
+ [columns]="columns"
+ selectionType="single"
+ [hasDetails]="false"
+ (fetchData)="fetchData()"
+ (updateSelection)="updateSelection($event)"
+>
+ <div class="table-actions btn-toolbar">
+ <cd-table-actions
+ [permission]="permissions.cephfs"
+ [selection]="selection"
+ class="btn-group"
+ [tableActions]="tableActions"
+ >
+ </cd-table-actions>
+ </div>
+</cd-table>
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.scss
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.scss
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.spec.ts
new file mode 100644
index 00000000000..a20972f1cf8
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.spec.ts
@@ -0,0 +1,30 @@
+import { ComponentFixture, TestBed } from '@angular/core/testing';
+
+import { CephfsSnapshotscheduleListComponent } from './cephfs-snapshotschedule-list.component';
+import { HttpClientTestingModule } from '@angular/common/http/testing';
+import { SharedModule } from '~/app/shared/shared.module';
+import { ToastrModule } from 'ngx-toastr';
+import { RouterTestingModule } from '@angular/router/testing';
+import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap';
+import { configureTestBed } from '~/testing/unit-test-helper';
+
+describe('CephfsSnapshotscheduleListComponent', () => {
+ let component: CephfsSnapshotscheduleListComponent;
+ let fixture: ComponentFixture<CephfsSnapshotscheduleListComponent>;
+
+ configureTestBed({
+ declarations: [CephfsSnapshotscheduleListComponent],
+ imports: [HttpClientTestingModule, SharedModule, ToastrModule.forRoot(), RouterTestingModule],
+ providers: [NgbActiveModal]
+ });
+
+ beforeEach(() => {
+ fixture = TestBed.createComponent(CephfsSnapshotscheduleListComponent);
+ component = fixture.componentInstance;
+ fixture.detectChanges();
+ });
+
+ it('should create', () => {
+ expect(component).toBeTruthy();
+ });
+});
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.ts
new file mode 100644
index 00000000000..d5f24e89728
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component.ts
@@ -0,0 +1,99 @@
+import { Component, Input, OnChanges, OnInit, SimpleChanges, ViewChild } from '@angular/core';
+import { NgbModalRef } from '@ng-bootstrap/ng-bootstrap';
+import { BehaviorSubject, Observable } from 'rxjs';
+import { finalize, shareReplay, switchMap } from 'rxjs/operators';
+import { CephfsSnapshotScheduleService } from '~/app/shared/api/cephfs-snapshot-schedule.service';
+import { CdForm } from '~/app/shared/forms/cd-form';
+import { CdTableAction } from '~/app/shared/models/cd-table-action';
+import { CdTableColumn } from '~/app/shared/models/cd-table-column';
+import { CdTableFetchDataContext } from '~/app/shared/models/cd-table-fetch-data-context';
+import { CdTableSelection } from '~/app/shared/models/cd-table-selection';
+import { Permissions } from '~/app/shared/models/permissions';
+import { SnapshotSchedule } from '~/app/shared/models/snapshot-schedule';
+import { AuthStorageService } from '~/app/shared/services/auth-storage.service';
+import { ModalService } from '~/app/shared/services/modal.service';
+import { Icons } from '~/app/shared/enum/icons.enum';
+import { CellTemplate } from '~/app/shared/enum/cell-template.enum';
+
+@Component({
+ selector: 'cd-cephfs-snapshotschedule-list',
+ templateUrl: './cephfs-snapshotschedule-list.component.html',
+ styleUrls: ['./cephfs-snapshotschedule-list.component.scss']
+})
+export class CephfsSnapshotscheduleListComponent extends CdForm implements OnInit, OnChanges {
+ @Input() fsName!: string;
+
+ @ViewChild('pathTpl', { static: true })
+ pathTpl: any;
+
+ snapshotSchedules$!: Observable<SnapshotSchedule[]>;
+ subject$ = new BehaviorSubject<SnapshotSchedule[]>([]);
+ isLoading$ = new BehaviorSubject<boolean>(true);
+ columns: CdTableColumn[] = [];
+ tableActions: CdTableAction[] = [];
+ context!: CdTableFetchDataContext;
+ selection = new CdTableSelection();
+ permissions!: Permissions;
+ modalRef!: NgbModalRef;
+ errorMessage: string = '';
+ selectedName: string = '';
+ icons = Icons;
+
+ constructor(
+ private snapshotScheduleService: CephfsSnapshotScheduleService,
+ private authStorageService: AuthStorageService,
+ private modalService: ModalService
+ ) {
+ super();
+ this.permissions = this.authStorageService.getPermissions();
+ }
+
+ ngOnChanges(changes: SimpleChanges): void {
+ if (changes.fsName) {
+ this.subject$.next([]);
+ }
+ }
+
+ ngOnInit(): void {
+ this.snapshotSchedules$ = this.subject$.pipe(
+ switchMap(() =>
+ this.snapshotScheduleService
+ .getSnapshotScheduleList('/', this.fsName)
+ .pipe(finalize(() => this.isLoading$.next(false)))
+ ),
+ shareReplay(1)
+ );
+
+ this.columns = [
+ { prop: 'path', name: $localize`Path`, flexGrow: 3, cellTemplate: this.pathTpl },
+ { prop: 'subvol', name: $localize`Subvolume` },
+ { prop: 'schedule', name: $localize`Repeat interval` },
+ { prop: 'retention', name: $localize`Retention policy` },
+ { prop: 'created_count', name: $localize`Created Count` },
+ { prop: 'pruned_count', name: $localize`Deleted Count` },
+ { prop: 'start', name: $localize`Start time`, cellTransformation: CellTemplate.timeAgo },
+ { prop: 'created', name: $localize`Created`, cellTransformation: CellTemplate.timeAgo }
+ ];
+
+ this.tableActions = [];
+ }
+
+ fetchData() {
+ this.subject$.next([]);
+ }
+
+ updateSelection(selection: CdTableSelection) {
+ this.selection = selection;
+ }
+
+ openModal(edit = false) {
+ this.modalService.show(
+ {},
+ {
+ fsName: 'fs1',
+ isEdit: edit
+ },
+ { size: 'lg' }
+ );
+ }
+}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-group/cephfs-subvolume-group.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-group/cephfs-subvolume-group.component.ts
index 3807ae61b67..a91daf8cb93 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-group/cephfs-subvolume-group.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-group/cephfs-subvolume-group.component.ts
@@ -1,5 +1,5 @@
-import { Component, Input, OnChanges, OnInit, ViewChild } from '@angular/core';
-import { Observable, ReplaySubject, of } from 'rxjs';
+import { Component, Input, OnChanges, OnInit, SimpleChanges, ViewChild } from '@angular/core';
+import { BehaviorSubject, Observable, of } from 'rxjs';
import { catchError, shareReplay, switchMap } from 'rxjs/operators';
import { CephfsSubvolumeGroupService } from '~/app/shared/api/cephfs-subvolume-group.service';
@@ -9,7 +9,6 @@ import { CdTableAction } from '~/app/shared/models/cd-table-action';
import { CdTableColumn } from '~/app/shared/models/cd-table-column';
import { CdTableFetchDataContext } from '~/app/shared/models/cd-table-fetch-data-context';
import { CdTableSelection } from '~/app/shared/models/cd-table-selection';
-import { CephfsSubvolumeGroup } from '~/app/shared/models/cephfs-subvolumegroup.model';
import { CephfsSubvolumegroupFormComponent } from '../cephfs-subvolumegroup-form/cephfs-subvolumegroup-form.component';
import { ActionLabelsI18n } from '~/app/shared/constants/app.constants';
import { AuthStorageService } from '~/app/shared/services/auth-storage.service';
@@ -18,6 +17,7 @@ import { Permissions } from '~/app/shared/models/permissions';
import { CriticalConfirmationModalComponent } from '~/app/shared/components/critical-confirmation-modal/critical-confirmation-modal.component';
import { FinishedTask } from '~/app/shared/models/finished-task';
import { TaskWrapperService } from '~/app/shared/services/task-wrapper.service';
+import { CephfsSubvolumeGroup } from '~/app/shared/models/cephfs-subvolume-group.model';
@Component({
selector: 'cd-cephfs-subvolume-group',
@@ -52,7 +52,7 @@ export class CephfsSubvolumeGroupComponent implements OnInit, OnChanges {
permissions: Permissions;
subvolumeGroup$: Observable<CephfsSubvolumeGroup[]>;
- subject = new ReplaySubject<CephfsSubvolumeGroup[]>();
+ subject = new BehaviorSubject<CephfsSubvolumeGroup[]>([]);
constructor(
private cephfsSubvolumeGroup: CephfsSubvolumeGroupService,
@@ -138,11 +138,13 @@ export class CephfsSubvolumeGroupComponent implements OnInit, OnChanges {
}
fetchData() {
- this.subject.next();
+ this.subject.next([]);
}
- ngOnChanges() {
- this.subject.next();
+ ngOnChanges(changes: SimpleChanges) {
+ if (changes.fsName) {
+ this.subject.next([]);
+ }
}
updateSelection(selection: CdTableSelection) {
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-list/cephfs-subvolume-list.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-list/cephfs-subvolume-list.component.html
index 29731bbbd1b..f840c8dab11 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-list/cephfs-subvolume-list.component.html
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-list/cephfs-subvolume-list.component.html
@@ -1,21 +1,10 @@
<div class="row">
- <div class="col-sm-1">
- <h3 i18n>Groups</h3>
- <ng-container *ngIf="subVolumeGroups$ | async as subVolumeGroups">
- <ul class="nav flex-column nav-pills">
- <li class="nav-item">
- <a class="nav-link"
- [class.active]="!activeGroupName"
- (click)="selectSubVolumeGroup()">Default</a>
- </li>
- <li class="nav-item"
- *ngFor="let subVolumeGroup of subVolumeGroups">
- <a class="nav-link text-decoration-none text-break"
- [class.active]="subVolumeGroup.name === activeGroupName"
- (click)="selectSubVolumeGroup(subVolumeGroup.name)">{{subVolumeGroup.name}}</a>
- </li>
- </ul>
- </ng-container>
+ <div class="col-sm-1"
+ *ngIf="subVolumeGroups$ | async as subVolumeGroups">
+ <cd-vertical-navigation title="Groups"
+ [items]="subvolumeGroupList"
+ inputIdentifier="group-filter"
+ (emitActiveItem)="selectSubVolumeGroup($event)"></cd-vertical-navigation>
</div>
<div class="col-11 vertical-line">
<cd-table [data]="subVolumes$ | async"
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-list/cephfs-subvolume-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-list/cephfs-subvolume-list.component.ts
index 3f679d27b96..4f9cf27db0f 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-list/cephfs-subvolume-list.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-list/cephfs-subvolume-list.component.ts
@@ -1,6 +1,14 @@
-import { Component, Input, OnChanges, OnInit, TemplateRef, ViewChild } from '@angular/core';
-import { Observable, ReplaySubject, of } from 'rxjs';
-import { catchError, shareReplay, switchMap } from 'rxjs/operators';
+import {
+ Component,
+ Input,
+ OnChanges,
+ OnInit,
+ SimpleChanges,
+ TemplateRef,
+ ViewChild
+} from '@angular/core';
+import { BehaviorSubject, Observable, of } from 'rxjs';
+import { catchError, switchMap, tap } from 'rxjs/operators';
import { CephfsSubvolumeService } from '~/app/shared/api/cephfs-subvolume.service';
import { ActionLabelsI18n } from '~/app/shared/constants/app.constants';
import { CellTemplate } from '~/app/shared/enum/cell-template.enum';
@@ -22,7 +30,7 @@ import { CdFormGroup } from '~/app/shared/forms/cd-form-group';
import { CdForm } from '~/app/shared/forms/cd-form';
import { CriticalConfirmationModalComponent } from '~/app/shared/components/critical-confirmation-modal/critical-confirmation-modal.component';
import { CephfsSubvolumeGroupService } from '~/app/shared/api/cephfs-subvolume-group.service';
-import { CephfsSubvolumeGroup } from '~/app/shared/models/cephfs-subvolumegroup.model';
+import { CephfsSubvolumeGroup } from '~/app/shared/models/cephfs-subvolume-group.model';
@Component({
selector: 'cd-cephfs-subvolume-list',
@@ -64,13 +72,16 @@ export class CephfsSubvolumeListComponent extends CdForm implements OnInit, OnCh
subVolumes$: Observable<CephfsSubvolume[]>;
subVolumeGroups$: Observable<CephfsSubvolumeGroup[]>;
- subject = new ReplaySubject<CephfsSubvolume[]>();
- groupsSubject = new ReplaySubject<CephfsSubvolume[]>();
+ subject = new BehaviorSubject<CephfsSubvolume[]>([]);
+ groupsSubject = new BehaviorSubject<CephfsSubvolume[]>([]);
+
+ subvolumeGroupList: string[] = [];
+ subVolumesList: CephfsSubvolume[] = [];
activeGroupName: string = '';
constructor(
- private cephfsSubVolume: CephfsSubvolumeService,
+ private cephfsSubVolumeService: CephfsSubvolumeService,
private actionLabels: ActionLabelsI18n,
private modalService: ModalService,
private authStorageService: AuthStorageService,
@@ -146,11 +157,13 @@ export class CephfsSubvolumeListComponent extends CdForm implements OnInit, OnCh
}
];
- this.getSubVolumes();
-
this.subVolumeGroups$ = this.groupsSubject.pipe(
switchMap(() =>
- this.cephfsSubvolumeGroupService.get(this.fsName).pipe(
+ this.cephfsSubvolumeGroupService.get(this.fsName, false).pipe(
+ tap((groups) => {
+ this.subvolumeGroupList = groups.map((group) => group.name);
+ this.subvolumeGroupList.unshift('');
+ }),
catchError(() => {
this.context.error();
return of(null);
@@ -161,12 +174,14 @@ export class CephfsSubvolumeListComponent extends CdForm implements OnInit, OnCh
}
fetchData() {
- this.subject.next();
+ this.subject.next([]);
}
- ngOnChanges() {
- this.subject.next();
- this.groupsSubject.next();
+ ngOnChanges(changes: SimpleChanges) {
+ if (changes.fsName) {
+ this.subject.next([]);
+ this.groupsSubject.next([]);
+ }
}
updateSelection(selection: CdTableSelection) {
@@ -203,7 +218,7 @@ export class CephfsSubvolumeListComponent extends CdForm implements OnInit, OnCh
this.taskWrapper
.wrapTaskAroundCall({
task: new FinishedTask('cephfs/subvolume/remove', { subVolumeName: this.selectedName }),
- call: this.cephfsSubVolume.remove(
+ call: this.cephfsSubVolumeService.remove(
this.fsName,
this.selectedName,
this.activeGroupName,
@@ -222,20 +237,19 @@ export class CephfsSubvolumeListComponent extends CdForm implements OnInit, OnCh
selectSubVolumeGroup(subVolumeGroupName: string) {
this.activeGroupName = subVolumeGroupName;
- this.getSubVolumes(subVolumeGroupName);
+ this.getSubVolumes();
}
- getSubVolumes(subVolumeGroupName = '') {
+ getSubVolumes() {
this.subVolumes$ = this.subject.pipe(
switchMap(() =>
- this.cephfsSubVolume.get(this.fsName, subVolumeGroupName).pipe(
+ this.cephfsSubVolumeService.get(this.fsName, this.activeGroupName).pipe(
catchError(() => {
this.context.error();
return of(null);
})
)
- ),
- shareReplay(1)
+ )
);
}
}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.html
new file mode 100644
index 00000000000..de311723652
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.html
@@ -0,0 +1,36 @@
+<ng-container *ngIf="isLoading">
+ <cd-loading-panel>
+ <span i18n>Loading snapshots...</span>
+ </cd-loading-panel>
+</ng-container>
+
+<div class="row"
+ *ngIf="isSubVolumesAvailable; else noGroupsTpl">
+ <div class="col-sm-2">
+ <cd-vertical-navigation title="Groups"
+ [items]="subvolumeGroupList"
+ inputIdentifier="group-filter"
+ (emitActiveItem)="selectSubVolumeGroup($event)"></cd-vertical-navigation>
+ </div>
+ <div class="col-sm-2 vertical-line"
+ *ngIf="subVolumes$ | async">
+ <cd-vertical-navigation title="Subvolumes"
+ [items]="subVolumesList"
+ (emitActiveItem)="selectSubVolume($event)"
+ inputIdentifier="subvol-filter"></cd-vertical-navigation>
+ </div>
+ <div class="col-8 vertical-line"
+ *ngIf="isSubVolumesAvailable">
+ <cd-table [data]="snapshots$ | async"
+ columnMode="flex"
+ [columns]="columns"
+ selectionType="single"
+ [hasDetails]="false"
+ (fetchData)="fetchData()"></cd-table>
+ </div>
+</div>
+<ng-template #noGroupsTpl>
+ <cd-alert-panel type="info"
+ i18n
+ *ngIf="!isLoading">No subvolumes are present. Please create subvolumes to manage snapshots.</cd-alert-panel>
+</ng-template>
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.scss
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.scss
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.spec.ts
new file mode 100644
index 00000000000..1d03cf2a8bc
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.spec.ts
@@ -0,0 +1,38 @@
+import { ComponentFixture, TestBed } from '@angular/core/testing';
+
+import { CephfsSubvolumeSnapshotsListComponent } from './cephfs-subvolume-snapshots-list.component';
+import { HttpClientTestingModule } from '@angular/common/http/testing';
+import { SharedModule } from '~/app/shared/shared.module';
+
+describe('CephfsSubvolumeSnapshotsListComponent', () => {
+ let component: CephfsSubvolumeSnapshotsListComponent;
+ let fixture: ComponentFixture<CephfsSubvolumeSnapshotsListComponent>;
+
+ beforeEach(async () => {
+ await TestBed.configureTestingModule({
+ declarations: [CephfsSubvolumeSnapshotsListComponent],
+ imports: [HttpClientTestingModule, SharedModule]
+ }).compileComponents();
+
+ fixture = TestBed.createComponent(CephfsSubvolumeSnapshotsListComponent);
+ component = fixture.componentInstance;
+ fixture.detectChanges();
+ });
+
+ it('should create', () => {
+ expect(component).toBeTruthy();
+ });
+
+ it('should show loading when the items are loading', () => {
+ component.isLoading = true;
+ fixture.detectChanges();
+ expect(fixture.nativeElement.querySelector('cd-loading-panel')).toBeTruthy();
+ });
+
+ it('should show the alert panel when there are no subvolumes', () => {
+ component.isLoading = false;
+ component.subvolumeGroupList = [];
+ fixture.detectChanges();
+ expect(fixture.nativeElement.querySelector('cd-alert-panel')).toBeTruthy();
+ });
+});
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.ts
new file mode 100644
index 00000000000..251314c3e86
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.ts
@@ -0,0 +1,148 @@
+import { Component, Input, OnChanges, OnInit, SimpleChanges } from '@angular/core';
+import { BehaviorSubject, Observable, forkJoin, of } from 'rxjs';
+import { catchError, shareReplay, switchMap, tap } from 'rxjs/operators';
+import { CephfsSubvolumeGroupService } from '~/app/shared/api/cephfs-subvolume-group.service';
+import { CephfsSubvolumeService } from '~/app/shared/api/cephfs-subvolume.service';
+import { CellTemplate } from '~/app/shared/enum/cell-template.enum';
+import { CdTableColumn } from '~/app/shared/models/cd-table-column';
+import { CdTableFetchDataContext } from '~/app/shared/models/cd-table-fetch-data-context';
+import { CephfsSubvolume, SubvolumeSnapshot } from '~/app/shared/models/cephfs-subvolume.model';
+
+@Component({
+ selector: 'cd-cephfs-subvolume-snapshots-list',
+ templateUrl: './cephfs-subvolume-snapshots-list.component.html',
+ styleUrls: ['./cephfs-subvolume-snapshots-list.component.scss']
+})
+export class CephfsSubvolumeSnapshotsListComponent implements OnInit, OnChanges {
+ @Input() fsName: string;
+
+ context: CdTableFetchDataContext;
+ columns: CdTableColumn[] = [];
+
+ subVolumes$: Observable<CephfsSubvolume[]>;
+ snapshots$: Observable<any[]>;
+ snapshotSubject = new BehaviorSubject<SubvolumeSnapshot[]>([]);
+ subVolumeSubject = new BehaviorSubject<CephfsSubvolume[]>([]);
+
+ subvolumeGroupList: string[] = [];
+ subVolumesList: string[];
+
+ activeGroupName = '';
+ activeSubVolumeName = '';
+
+ isSubVolumesAvailable = false;
+ isLoading = true;
+
+ observables: any = [];
+
+ constructor(
+ private cephfsSubvolumeGroupService: CephfsSubvolumeGroupService,
+ private cephfsSubvolumeService: CephfsSubvolumeService
+ ) {}
+
+ ngOnInit(): void {
+ this.columns = [
+ {
+ name: $localize`Name`,
+ prop: 'name',
+ flexGrow: 1
+ },
+ {
+ name: $localize`Created`,
+ prop: 'info.created_at',
+ flexGrow: 1,
+ cellTransformation: CellTemplate.timeAgo
+ },
+ {
+ name: $localize`Pending Clones`,
+ prop: 'info.has_pending_clones',
+ flexGrow: 0.5,
+ cellTransformation: CellTemplate.badge,
+ customTemplateConfig: {
+ map: {
+ no: { class: 'badge-success' },
+ yes: { class: 'badge-info' }
+ }
+ }
+ }
+ ];
+
+ this.cephfsSubvolumeGroupService
+ .get(this.fsName)
+ .pipe(
+ switchMap((groups) => {
+ // manually adding the group '_nogroup' to the list.
+ groups.unshift({ name: '' });
+
+ const observables = groups.map((group) =>
+ this.cephfsSubvolumeService.existsInFs(this.fsName, group.name).pipe(
+ switchMap((resp) => {
+ if (resp) {
+ this.subvolumeGroupList.push(group.name);
+ }
+ return of(resp); // Emit the response
+ })
+ )
+ );
+
+ return forkJoin(observables);
+ })
+ )
+ .subscribe(() => {
+ if (this.subvolumeGroupList.length) {
+ this.isSubVolumesAvailable = true;
+ }
+ this.isLoading = false;
+ });
+ }
+
+ ngOnChanges(changes: SimpleChanges): void {
+ if (changes.fsName) {
+ this.subVolumeSubject.next([]);
+ }
+ }
+
+ selectSubVolumeGroup(subVolumeGroupName: string) {
+ this.activeGroupName = subVolumeGroupName;
+ this.getSubVolumes();
+ }
+
+ selectSubVolume(subVolumeName: string) {
+ this.activeSubVolumeName = subVolumeName;
+ this.getSubVolumesSnapshot();
+ }
+
+ getSubVolumes() {
+ this.subVolumes$ = this.subVolumeSubject.pipe(
+ switchMap(() =>
+ this.cephfsSubvolumeService.get(this.fsName, this.activeGroupName, false).pipe(
+ tap((resp) => {
+ this.subVolumesList = resp.map((subVolume) => subVolume.name);
+ this.activeSubVolumeName = resp[0].name;
+ this.getSubVolumesSnapshot();
+ })
+ )
+ )
+ );
+ }
+
+ getSubVolumesSnapshot() {
+ this.snapshots$ = this.snapshotSubject.pipe(
+ switchMap(() =>
+ this.cephfsSubvolumeService
+ .getSnapshots(this.fsName, this.activeSubVolumeName, this.activeGroupName)
+ .pipe(
+ catchError(() => {
+ this.context.error();
+ return of(null);
+ })
+ )
+ ),
+ shareReplay(1)
+ );
+ }
+
+ fetchData() {
+ this.snapshotSubject.next([]);
+ }
+}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-tabs/cephfs-tabs.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-tabs/cephfs-tabs.component.html
index 0ad69ccf50a..87268858be5 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-tabs/cephfs-tabs.component.html
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-tabs/cephfs-tabs.component.html
@@ -1,63 +1,94 @@
<ng-container *ngIf="selection">
- <nav ngbNav
- #nav="ngbNav"
- (navChange)="softRefresh()"
- class="nav-tabs"
- cdStatefulTab="cephfs-tabs">
+ <nav
+ ngbNav
+ #nav="ngbNav"
+ (navChange)="softRefresh()"
+ class="nav-tabs"
+ cdStatefulTab="cephfs-tabs"
+ >
<ng-container ngbNavItem="details">
- <a ngbNavLink
- i18n>Details</a>
+ <a
+ ngbNavLink
+ i18n>Details</a>
<ng-template ngbNavContent>
- <cd-cephfs-detail [data]="details">
- </cd-cephfs-detail>
+ <cd-cephfs-detail [data]="details"> </cd-cephfs-detail>
</ng-template>
</ng-container>
<ng-container ngbNavItem="subvolumes">
- <a ngbNavLink
- i18n>Subvolumes</a>
+ <a
+ ngbNavLink
+ i18n>Subvolumes</a>
<ng-template ngbNavContent>
- <cd-cephfs-subvolume-list [fsName]="selection.mdsmap.fs_name"
- [pools]="details.pools"></cd-cephfs-subvolume-list>
+ <cd-cephfs-subvolume-list
+ [fsName]="selection.mdsmap.fs_name"
+ [pools]="details.pools"
+ ></cd-cephfs-subvolume-list>
</ng-template>
</ng-container>
<ng-container ngbNavItem="subvolume-groups">
- <a ngbNavLink
- i18n>Subvolume groups</a>
+ <a
+ ngbNavLink
+ i18n>Subvolume groups</a>
<ng-template ngbNavContent>
- <cd-cephfs-subvolume-group [fsName]="selection.mdsmap.fs_name"
- [pools]="details.pools">
+ <cd-cephfs-subvolume-group
+ [fsName]="selection.mdsmap.fs_name"
+ [pools]="details.pools">
</cd-cephfs-subvolume-group>
</ng-template>
</ng-container>
+ <ng-container ngbNavItem="snapshots">
+ <a
+ ngbNavLink
+ i18n>Snapshots</a>
+ <ng-template ngbNavContent>
+ <cd-cephfs-subvolume-snapshots-list [fsName]="selection.mdsmap.fs_name">
+ </cd-cephfs-subvolume-snapshots-list>
+ </ng-template>
+ </ng-container>
+ <ng-container ngbNavItem="snapshot-schedules">
+ <a
+ ngbNavLink
+ i18n>Snapshot schedules</a>
+ <ng-template ngbNavContent>
+ <cd-cephfs-snapshotschedule-list
+ [fsName]="selection.mdsmap.fs_name"
+ ></cd-cephfs-snapshotschedule-list>
+ </ng-template>
+ </ng-container>
<ng-container ngbNavItem="clients">
<a ngbNavLink>
<ng-container i18n>Clients</ng-container>
<span class="badge badge-pill badge-tab ms-1">{{ clients.data.length }}</span>
</a>
<ng-template ngbNavContent>
- <cd-cephfs-clients [id]="id"
- [clients]="clients"
- (triggerApiUpdate)="refresh()">
+ <cd-cephfs-clients
+ [id]="id"
+ [clients]="clients"
+ (triggerApiUpdate)="refresh()">
</cd-cephfs-clients>
</ng-template>
</ng-container>
<ng-container ngbNavItem="directories">
- <a ngbNavLink
- i18n>Directories</a>
+ <a
+ ngbNavLink
+ i18n>Directories</a>
<ng-template ngbNavContent>
<cd-cephfs-directories [id]="id"></cd-cephfs-directories>
</ng-template>
</ng-container>
<ng-container ngbNavItem="performance-details">
- <a ngbNavLink
- i18n>Performance Details</a>
+ <a
+ ngbNavLink
+ i18n>Performance Details</a>
<ng-template ngbNavContent>
- <cd-grafana i18n-title
- title="CephFS MDS performance"
- [grafanaPath]="'mds-performance?var-mds_servers=mds.' + grafanaId"
- [type]="'metrics'"
- uid="tbO9LAiZz"
- grafanaStyle="one">
+ <cd-grafana
+ i18n-title
+ title="CephFS MDS performance"
+ [grafanaPath]="'mds-performance?var-mds_servers=mds.' + grafanaId"
+ [type]="'metrics'"
+ uid="tbO9LAiZz"
+ grafanaStyle="one"
+ >
</cd-grafana>
</ng-template>
</ng-container>
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs.module.ts
index a83e0f16870..53544ccd9ed 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs.module.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs.module.ts
@@ -19,6 +19,9 @@ import { CephfsSubvolumeListComponent } from './cephfs-subvolume-list/cephfs-sub
import { CephfsSubvolumeFormComponent } from './cephfs-subvolume-form/cephfs-subvolume-form.component';
import { CephfsSubvolumeGroupComponent } from './cephfs-subvolume-group/cephfs-subvolume-group.component';
import { CephfsSubvolumegroupFormComponent } from './cephfs-subvolumegroup-form/cephfs-subvolumegroup-form.component';
+import { CephfsSubvolumeSnapshotsListComponent } from './cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component';
+import { CephfsSnapshotscheduleListComponent } from './cephfs-snapshotschedule-list/cephfs-snapshotschedule-list.component';
+import { DataTableModule } from '../../shared/datatable/datatable.module';
@NgModule({
imports: [
@@ -31,7 +34,8 @@ import { CephfsSubvolumegroupFormComponent } from './cephfs-subvolumegroup-form/
FormsModule,
ReactiveFormsModule,
NgbTypeaheadModule,
- NgbTooltipModule
+ NgbTooltipModule,
+ DataTableModule
],
declarations: [
CephfsDetailComponent,
@@ -45,7 +49,9 @@ import { CephfsSubvolumegroupFormComponent } from './cephfs-subvolumegroup-form/
CephfsSubvolumeFormComponent,
CephfsDirectoriesComponent,
CephfsSubvolumeGroupComponent,
- CephfsSubvolumegroupFormComponent
+ CephfsSubvolumegroupFormComponent,
+ CephfsSubvolumeSnapshotsListComponent,
+ CephfsSnapshotscheduleListComponent
]
})
export class CephfsModule {}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.html
new file mode 100644
index 00000000000..3e732e35568
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.html
@@ -0,0 +1,59 @@
+<cd-modal [modalRef]="activeModal">
+ <span class="modal-title"
+ i18n>{{ getMode() }} Tag</span>
+
+ <ng-container class="modal-content">
+ <form class="form"
+ #formDir="ngForm"
+ [formGroup]="form">
+ <div class="modal-body">
+ <!-- Key -->
+ <div class="form-group row">
+ <label class="cd-col-form-label required"
+ for="key"
+ i18n>Key</label>
+ <div class="cd-col-form-input">
+ <input type="text"
+ class="form-control"
+ formControlName="key"
+ id="key">
+ <span class="invalid-feedback"
+ *ngIf="form.showError('key', formDir, 'required')"
+ i18n>This field is required.</span>
+ <span class="invalid-feedback"
+ *ngIf="form.showError('key', formDir, 'unique')"
+ i18n>This key must be unique.</span>
+ <span class="invalid-feedback"
+ *ngIf="form.showError('key', formDir, 'maxLength')"
+ i18n>Length of the key must be maximum of 128 characters</span>
+ </div>
+ </div>
+
+ <!-- Value -->
+ <div class="form-group row">
+ <label class="cd-col-form-label required"
+ for="value"
+ i18n>Value</label>
+ <div class="cd-col-form-input">
+ <input id="value"
+ class="form-control"
+ type="text"
+ formControlName="value">
+ <span *ngIf="form.showError('value', formDir, 'required')"
+ class="invalid-feedback"
+ i18n>This field is required.</span>
+ <span class="invalid-feedback"
+ *ngIf="form.showError('value', formDir, 'maxLength')"
+ i18n>Length of the value must be a maximum of 128 characters</span>
+ </div>
+ </div>
+ </div>
+
+ <div class="modal-footer">
+ <cd-form-button-panel (submitActionEvent)="onSubmit()"
+ [form]="form"
+ [submitText]="getMode()"></cd-form-button-panel>
+ </div>
+ </form>
+ </ng-container>
+ </cd-modal>
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.scss
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.scss
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.spec.ts
new file mode 100644
index 00000000000..a54e7eeee08
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.spec.ts
@@ -0,0 +1,27 @@
+import { ComponentFixture, TestBed } from '@angular/core/testing';
+
+import { BucketTagModalComponent } from './bucket-tag-modal.component';
+import { HttpClientTestingModule } from '@angular/common/http/testing';
+import { ReactiveFormsModule } from '@angular/forms';
+import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap';
+
+describe('BucketTagModalComponent', () => {
+ let component: BucketTagModalComponent;
+ let fixture: ComponentFixture<BucketTagModalComponent>;
+
+ beforeEach(async () => {
+ await TestBed.configureTestingModule({
+ declarations: [BucketTagModalComponent],
+ imports: [HttpClientTestingModule, ReactiveFormsModule],
+ providers: [NgbActiveModal]
+ }).compileComponents();
+
+ fixture = TestBed.createComponent(BucketTagModalComponent);
+ component = fixture.componentInstance;
+ fixture.detectChanges();
+ });
+
+ it('should create', () => {
+ expect(component).toBeTruthy();
+ });
+});
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.ts
new file mode 100644
index 00000000000..5135539e5d4
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/bucket-tag-modal/bucket-tag-modal.component.ts
@@ -0,0 +1,75 @@
+import { Component, EventEmitter, Output } from '@angular/core';
+import { Validators } from '@angular/forms';
+import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap';
+import _ from 'lodash';
+import { ActionLabelsI18n } from '~/app/shared/constants/app.constants';
+import { CdFormBuilder } from '~/app/shared/forms/cd-form-builder';
+import { CdFormGroup } from '~/app/shared/forms/cd-form-group';
+import { CdValidators } from '~/app/shared/forms/cd-validators';
+
+@Component({
+ selector: 'cd-bucket-tag-modal',
+ templateUrl: './bucket-tag-modal.component.html',
+ styleUrls: ['./bucket-tag-modal.component.scss']
+})
+export class BucketTagModalComponent {
+ @Output()
+ submitAction = new EventEmitter();
+
+ form: CdFormGroup;
+ editMode = false;
+ currentKeyTags: string[];
+ storedKey: string;
+
+ constructor(
+ private formBuilder: CdFormBuilder,
+ public activeModal: NgbActiveModal,
+ public actionLabels: ActionLabelsI18n
+ ) {
+ this.createForm();
+ }
+
+ private createForm() {
+ this.form = this.formBuilder.group({
+ key: [
+ null,
+ [
+ Validators.required,
+ CdValidators.custom('unique', (value: string) => {
+ if (_.isEmpty(value) && !this.currentKeyTags) {
+ return false;
+ }
+ return this.storedKey !== value && this.currentKeyTags.includes(value);
+ }),
+ CdValidators.custom('maxLength', (value: string) => {
+ if (_.isEmpty(value)) return false;
+ return value.length > 128;
+ })
+ ]
+ ],
+ value: [
+ null,
+ [
+ Validators.required,
+ CdValidators.custom('maxLength', (value: string) => {
+ if (_.isEmpty(value)) return false;
+ return value.length > 128;
+ })
+ ]
+ ]
+ });
+ }
+
+ onSubmit() {
+ this.submitAction.emit(this.form.value);
+ this.activeModal.close();
+ }
+
+ getMode() {
+ return this.editMode ? this.actionLabels.EDIT : this.actionLabels.ADD;
+ }
+
+ fillForm(tag: Record<string, string>) {
+ this.form.setValue(tag);
+ }
+}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.html
index f2447feab26..e96a89b234f 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.html
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.html
@@ -100,6 +100,21 @@
</ng-container>
</tbody>
</table>
+
+ <!-- Tags -->
+ <ng-container *ngIf="selection.tagset">
+ <legend i18n>Tags</legend>
+ <table class="table table-striped table-bordered">
+ <tbody>
+ <tr *ngFor="let tag of selection.tagset | keyvalue">
+ <td i18n
+ class="bold w-25">{{tag.key}}</td>
+ <td class="w-75">{{ tag.value }}</td>
+ </tr>
+ </tbody>
+ </table>
+ </ng-container>
+
</ng-template>
</ng-container>
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.html
index 761081c3744..a9704c0bdc8 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.html
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.html
@@ -385,6 +385,32 @@
</div>
</fieldset>
+ <!-- Tags -->
+ <legend class="cd-header"
+ i18n>Tags
+ <cd-helper>Tagging gives you a way to categorize storage</cd-helper>
+ </legend>
+ <span *ngFor="let tag of tags; let i=index;">
+ <ng-container *ngTemplateOutlet="tagTpl; context:{index: i, tag: tag}"></ng-container>
+ </span>
+
+ <div class="row">
+ <div class="col-12">
+ <strong *ngIf="tags.length > 19"
+ class="text-warning"
+ i18n>Maximum of 20 tags reached</strong>
+ <button type="button"
+ id="add-tag"
+ class="btn btn-light float-end my-3"
+ [disabled]="tags.length > 19"
+ (click)="showTagModal()">
+ <i [ngClass]="[icons.add]"></i>
+ <ng-container i18n>Add tag</ng-container>
+ </button>
+ </div>
+ </div>
+
+
</div>
<div class="card-footer">
<cd-form-button-panel (submitActionEvent)="submit()"
@@ -395,3 +421,37 @@
</div>
</form>
</div>
+
+<ng-template #tagTpl
+ let-tag="tag"
+ let-index="index">
+ <div class="input-group my-2">
+ <ng-container *ngFor="let config of tagConfig">
+ <input type="text"
+ id="tag-{{config.attribute}}-{{index}}"
+ class="form-control"
+ [ngbTooltip]="config.attribute"
+ [value]="tag[config.attribute]"
+ disabled
+ readonly>
+ </ng-container>
+
+ <!-- Tag actions -->
+ <button type="button"
+ class="btn btn-light"
+ id="tag-edit-{{index}}"
+ i18n-ngbTooltip
+ ngbTooltip="Edit"
+ (click)="showTagModal(index)">
+ <i [ngClass]="[icons.edit]"></i>
+ </button>
+ <button type="button"
+ class="btn btn-light"
+ id="tag-delete-{{index}}"
+ i18n-ngbTooltip
+ ngbTooltip="Delete"
+ (click)="deleteTag(index)">
+ <i [ngClass]="[icons.trash]"></i>
+ </button>
+ </div>
+</ng-template>
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.ts
index de8e0383ac0..6b90b45e16a 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.ts
@@ -21,6 +21,7 @@ import { RgwBucketEncryptionModel } from '../models/rgw-bucket-encryption';
import { RgwBucketMfaDelete } from '../models/rgw-bucket-mfa-delete';
import { RgwBucketVersioning } from '../models/rgw-bucket-versioning';
import { RgwConfigModalComponent } from '../rgw-config-modal/rgw-config-modal.component';
+import { BucketTagModalComponent } from '../bucket-tag-modal/bucket-tag-modal.component';
@Component({
selector: 'cd-rgw-bucket-form',
@@ -42,6 +43,15 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC
icons = Icons;
kmsVaultConfig = false;
s3VaultConfig = false;
+ tags: Record<string, string>[] = [];
+ tagConfig = [
+ {
+ attribute: 'key'
+ },
+ {
+ attribute: 'value'
+ }
+ ];
get isVersioningEnabled(): boolean {
return this.bucketForm.getValue('versioning');
@@ -191,6 +201,11 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC
value['versioning'] = bidResp['versioning'] === RgwBucketVersioning.ENABLED;
value['mfa-delete'] = bidResp['mfa_delete'] === RgwBucketMfaDelete.ENABLED;
value['encryption_enabled'] = bidResp['encryption'] === 'Enabled';
+ if (bidResp['tagset']) {
+ for (const [key, value] of Object.entries(bidResp['tagset'])) {
+ this.tags.push({ key: key, value: value.toString() });
+ }
+ }
// Append default values.
value = _.merge(defaults, value);
// Update the form.
@@ -224,6 +239,7 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC
return;
}
const values = this.bucketForm.value;
+ const xmlStrTags = this.tagsToXML(this.tags);
if (this.editing) {
// Edit
const versioning = this.getVersioningStatus();
@@ -241,7 +257,8 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC
values['mfa-token-serial'],
values['mfa-token-pin'],
values['lock_mode'],
- values['lock_retention_period_days']
+ values['lock_retention_period_days'],
+ xmlStrTags
)
.subscribe(
() => {
@@ -269,7 +286,8 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC
values['lock_retention_period_days'],
values['encryption_enabled'],
values['encryption_type'],
- values['keyId']
+ values['keyId'],
+ xmlStrTags
)
.subscribe(
() => {
@@ -337,4 +355,51 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC
.get('encryptionType')
.setValue(this.bucketForm.getValue('encryption_type') || 'AES256');
}
+
+ showTagModal(index?: number) {
+ const modalRef = this.modalService.show(BucketTagModalComponent);
+ const modalComponent = modalRef.componentInstance as BucketTagModalComponent;
+ modalComponent.currentKeyTags = this.tags.map((item) => item.key);
+
+ if (_.isNumber(index)) {
+ modalComponent.editMode = true;
+ modalComponent.fillForm(this.tags[index]);
+ modalComponent.storedKey = this.tags[index]['key'];
+ }
+
+ modalComponent.submitAction.subscribe((tag: Record<string, string>) => {
+ this.setTag(tag, index);
+ });
+ }
+
+ deleteTag(index: number) {
+ this.tags.splice(index, 1);
+ }
+
+ private setTag(tag: Record<string, string>, index?: number) {
+ if (_.isNumber(index)) {
+ this.tags[index] = tag;
+ } else {
+ this.tags.push(tag);
+ }
+ this.bucketForm.markAsDirty();
+ this.bucketForm.updateValueAndValidity();
+ }
+
+ private tagsToXML(tags: Record<string, string>[]): string {
+ let xml = '<Tagging><TagSet>';
+ for (const tag of tags) {
+ xml += '<Tag>';
+ for (const key in tag) {
+ if (key === 'key') {
+ xml += `<Key>${tag[key]}</Key>`;
+ } else if (key === 'value') {
+ xml += `<Value>${tag[key]}</Value>`;
+ }
+ }
+ xml += '</Tag>';
+ }
+ xml += '</TagSet></Tagging>';
+ return xml;
+ }
}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts
index c16c13a81bd..8fcf3c6c664 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts
@@ -44,6 +44,7 @@ import { DashboardV3Module } from '../dashboard-v3/dashboard-v3.module';
import { RgwSyncPrimaryZoneComponent } from './rgw-sync-primary-zone/rgw-sync-primary-zone.component';
import { RgwSyncMetadataInfoComponent } from './rgw-sync-metadata-info/rgw-sync-metadata-info.component';
import { RgwSyncDataInfoComponent } from './rgw-sync-data-info/rgw-sync-data-info.component';
+import { BucketTagModalComponent } from './bucket-tag-modal/bucket-tag-modal.component';
@NgModule({
imports: [
@@ -100,7 +101,8 @@ import { RgwSyncDataInfoComponent } from './rgw-sync-data-info/rgw-sync-data-inf
RgwOverviewDashboardComponent,
RgwSyncPrimaryZoneComponent,
RgwSyncMetadataInfoComponent,
- RgwSyncDataInfoComponent
+ RgwSyncDataInfoComponent,
+ BucketTagModalComponent
]
})
export class RgwModule {}
@@ -156,6 +158,13 @@ const routes: Routes = [
data: {
breadcrumbs: ActionLabels.CREATE
}
+ },
+ {
+ path: URLVerbs.EDIT,
+ component: CrudFormComponent,
+ data: {
+ breadcrumbs: ActionLabels.EDIT
+ }
}
]
},
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/context/context.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/core/context/context.component.ts
index e036b754438..178f230c931 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/core/context/context.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/core/context/context.component.ts
@@ -24,12 +24,14 @@ export class ContextComponent implements OnInit, OnDestroy {
private subs = new Subscription();
private rgwUrlPrefix = '/rgw';
private rgwUserUrlPrefix = '/rgw/user';
+ private rgwRoleUrlPrefix = '/rgw/roles';
private rgwBuckerUrlPrefix = '/rgw/bucket';
permissions: Permissions;
featureToggleMap$: FeatureTogglesMap$;
isRgwRoute =
document.location.href.includes(this.rgwUserUrlPrefix) ||
- document.location.href.includes(this.rgwBuckerUrlPrefix);
+ document.location.href.includes(this.rgwBuckerUrlPrefix) ||
+ document.location.href.includes(this.rgwRoleUrlPrefix);
constructor(
private authStorageService: AuthStorageService,
@@ -48,9 +50,11 @@ export class ContextComponent implements OnInit, OnDestroy {
.pipe(filter((event: Event) => event instanceof NavigationEnd))
.subscribe(
() =>
- (this.isRgwRoute = [this.rgwBuckerUrlPrefix, this.rgwUserUrlPrefix].some((urlPrefix) =>
- this.router.url.startsWith(urlPrefix)
- ))
+ (this.isRgwRoute = [
+ this.rgwBuckerUrlPrefix,
+ this.rgwUserUrlPrefix,
+ this.rgwRoleUrlPrefix
+ ].some((urlPrefix) => this.router.url.startsWith(urlPrefix)))
)
);
// Set daemon list polling only when in RGW route:
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-snapshot-schedule.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-snapshot-schedule.service.spec.ts
new file mode 100644
index 00000000000..766b8f385b2
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-snapshot-schedule.service.spec.ts
@@ -0,0 +1,22 @@
+import { TestBed } from '@angular/core/testing';
+import { HttpClientTestingModule } from '@angular/common/http/testing';
+
+import { configureTestBed } from '~/testing/unit-test-helper';
+import { CephfsSnapshotScheduleService } from './cephfs-snapshot-schedule.service';
+
+describe('CephfsSnapshotScheduleService', () => {
+ let service: CephfsSnapshotScheduleService;
+
+ configureTestBed({
+ providers: [CephfsSnapshotScheduleService],
+ imports: [HttpClientTestingModule]
+ });
+
+ beforeEach(() => {
+ service = TestBed.inject(CephfsSnapshotScheduleService);
+ });
+
+ it('should be created', () => {
+ expect(service).toBeTruthy();
+ });
+});
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-snapshot-schedule.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-snapshot-schedule.service.ts
new file mode 100644
index 00000000000..ec9f58c0fee
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-snapshot-schedule.service.ts
@@ -0,0 +1,39 @@
+import { HttpClient } from '@angular/common/http';
+import { Injectable } from '@angular/core';
+import { Observable } from 'rxjs/internal/Observable';
+import { SnapshotSchedule } from '../models/snapshot-schedule';
+import { map } from 'rxjs/operators';
+
+@Injectable({
+ providedIn: 'root'
+})
+export class CephfsSnapshotScheduleService {
+ baseURL = 'api/cephfs';
+
+ constructor(private http: HttpClient) {}
+
+ getSnapshotScheduleList(
+ path: string,
+ fs: string,
+ recursive = true
+ ): Observable<SnapshotSchedule[]> {
+ return this.http
+ .get<SnapshotSchedule[]>(
+ `${this.baseURL}/snaphost/schedule?path=${path}&fs=${fs}&recursive=${recursive}`
+ )
+ .pipe(
+ map((snapList: SnapshotSchedule[]) =>
+ snapList.map((snapItem: SnapshotSchedule) => ({
+ ...snapItem,
+ status: snapItem.active ? 'Active' : 'Inactive',
+ subvol: snapItem?.subvol || ' - ',
+ retention: Object.values(snapItem.retention)?.length
+ ? Object.entries(snapItem.retention)
+ ?.map?.(([frequency, interval]) => `${interval}${frequency.toLocaleUpperCase()}`)
+ .join(' ')
+ : '-'
+ }))
+ )
+ );
+ }
+}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume-group.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume-group.service.ts
index db7fcfacd59..49d001f04f0 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume-group.service.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume-group.service.ts
@@ -1,9 +1,9 @@
import { HttpClient } from '@angular/common/http';
import { Injectable } from '@angular/core';
import { Observable, of } from 'rxjs';
-import { CephfsSubvolumeGroup } from '../models/cephfs-subvolumegroup.model';
import _ from 'lodash';
import { mapTo, catchError } from 'rxjs/operators';
+import { CephfsSubvolumeGroup } from '../models/cephfs-subvolume-group.model';
@Injectable({
providedIn: 'root'
@@ -13,8 +13,12 @@ export class CephfsSubvolumeGroupService {
constructor(private http: HttpClient) {}
- get(volName: string): Observable<CephfsSubvolumeGroup[]> {
- return this.http.get<CephfsSubvolumeGroup[]>(`${this.baseURL}/${volName}`);
+ get(volName: string, info = true): Observable<CephfsSubvolumeGroup[]> {
+ return this.http.get<CephfsSubvolumeGroup[]>(`${this.baseURL}/${volName}`, {
+ params: {
+ info: info
+ }
+ });
}
create(
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume.service.spec.ts
index e40e9a52f3f..2e8448ff1a2 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume.service.spec.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume.service.spec.ts
@@ -29,7 +29,7 @@ describe('CephfsSubvolumeService', () => {
it('should call get', () => {
service.get('testFS').subscribe();
- const req = httpTesting.expectOne('api/cephfs/subvolume/testFS?group_name=');
+ const req = httpTesting.expectOne('api/cephfs/subvolume/testFS?group_name=&info=true');
expect(req.request.method).toBe('GET');
});
@@ -40,4 +40,12 @@ describe('CephfsSubvolumeService', () => {
);
expect(req.request.method).toBe('DELETE');
});
+
+ it('should call getSnapshots', () => {
+ service.getSnapshots('testFS', 'testSubvol').subscribe();
+ const req = httpTesting.expectOne(
+ 'api/cephfs/subvolume/snapshot/testFS/testSubvol?group_name='
+ );
+ expect(req.request.method).toBe('GET');
+ });
});
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume.service.ts
index 4c167725007..d76523aafd2 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume.service.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/cephfs-subvolume.service.ts
@@ -1,6 +1,6 @@
import { HttpClient } from '@angular/common/http';
import { Injectable } from '@angular/core';
-import { CephfsSubvolume } from '../models/cephfs-subvolume.model';
+import { CephfsSubvolume, SubvolumeSnapshot } from '../models/cephfs-subvolume.model';
import { Observable, of } from 'rxjs';
import { catchError, mapTo } from 'rxjs/operators';
import _ from 'lodash';
@@ -13,10 +13,11 @@ export class CephfsSubvolumeService {
constructor(private http: HttpClient) {}
- get(fsName: string, subVolumeGroupName: string = ''): Observable<CephfsSubvolume[]> {
+ get(fsName: string, subVolumeGroupName: string = '', info = true): Observable<CephfsSubvolume[]> {
return this.http.get<CephfsSubvolume[]>(`${this.baseURL}/${fsName}`, {
params: {
- group_name: subVolumeGroupName
+ group_name: subVolumeGroupName,
+ info: info
}
});
}
@@ -86,6 +87,14 @@ export class CephfsSubvolumeService {
);
}
+ existsInFs(fsName: string, groupName = ''): Observable<boolean> {
+ return this.http.get<boolean>(`${this.baseURL}/${fsName}/exists`, {
+ params: {
+ group_name: groupName
+ }
+ });
+ }
+
update(fsName: string, subVolumeName: string, size: string, subVolumeGroupName: string = '') {
return this.http.put(`${this.baseURL}/${fsName}`, {
subvol_name: subVolumeName,
@@ -93,4 +102,19 @@ export class CephfsSubvolumeService {
group_name: subVolumeGroupName
});
}
+
+ getSnapshots(
+ fsName: string,
+ subVolumeName: string,
+ groupName = ''
+ ): Observable<SubvolumeSnapshot[]> {
+ return this.http.get<SubvolumeSnapshot[]>(
+ `${this.baseURL}/snapshot/${fsName}/${subVolumeName}`,
+ {
+ params: {
+ group_name: groupName
+ }
+ }
+ );
+ }
}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.spec.ts
index 2c42d8b427c..15821c3b626 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.spec.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.spec.ts
@@ -59,11 +59,12 @@ describe('RgwBucketService', () => {
'5',
true,
'aws:kms',
- 'qwerty1'
+ 'qwerty1',
+ null
)
.subscribe();
const req = httpTesting.expectOne(
- `api/rgw/bucket?bucket=foo&uid=bar&zonegroup=default&placement_target=default-placement&lock_enabled=false&lock_mode=COMPLIANCE&lock_retention_period_days=5&encryption_state=true&encryption_type=aws%253Akms&key_id=qwerty1&${RgwHelper.DAEMON_QUERY_PARAM}`
+ `api/rgw/bucket?bucket=foo&uid=bar&zonegroup=default&placement_target=default-placement&lock_enabled=false&lock_mode=COMPLIANCE&lock_retention_period_days=5&encryption_state=true&encryption_type=aws%253Akms&key_id=qwerty1&tags=null&${RgwHelper.DAEMON_QUERY_PARAM}`
);
expect(req.request.method).toBe('POST');
});
@@ -82,11 +83,12 @@ describe('RgwBucketService', () => {
'1',
'223344',
'GOVERNANCE',
- '10'
+ '10',
+ null
)
.subscribe();
const req = httpTesting.expectOne(
- `api/rgw/bucket/foo?${RgwHelper.DAEMON_QUERY_PARAM}&bucket_id=bar&uid=baz&versioning_state=Enabled&encryption_state=true&encryption_type=aws%253Akms&key_id=qwerty1&mfa_delete=Enabled&mfa_token_serial=1&mfa_token_pin=223344&lock_mode=GOVERNANCE&lock_retention_period_days=10`
+ `api/rgw/bucket/foo?${RgwHelper.DAEMON_QUERY_PARAM}&bucket_id=bar&uid=baz&versioning_state=Enabled&encryption_state=true&encryption_type=aws%253Akms&key_id=qwerty1&mfa_delete=Enabled&mfa_token_serial=1&mfa_token_pin=223344&lock_mode=GOVERNANCE&lock_retention_period_days=10&tags=null`
);
expect(req.request.method).toBe('PUT');
});
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.ts
index 7207d0b5ca7..87561d92d89 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.ts
@@ -59,7 +59,8 @@ export class RgwBucketService extends ApiClient {
lock_retention_period_days: string,
encryption_state: boolean,
encryption_type: string,
- key_id: string
+ key_id: string,
+ tags: string
) {
return this.rgwDaemonService.request((params: HttpParams) => {
return this.http.post(this.url, null, {
@@ -75,6 +76,7 @@ export class RgwBucketService extends ApiClient {
encryption_state: String(encryption_state),
encryption_type,
key_id,
+ tags: tags,
daemon_name: params.get('daemon_name')
}
})
@@ -94,7 +96,8 @@ export class RgwBucketService extends ApiClient {
mfaTokenSerial: string,
mfaTokenPin: string,
lockMode: 'GOVERNANCE' | 'COMPLIANCE',
- lockRetentionPeriodDays: string
+ lockRetentionPeriodDays: string,
+ tags: string
) {
return this.rgwDaemonService.request((params: HttpParams) => {
params = params.appendAll({
@@ -108,7 +111,8 @@ export class RgwBucketService extends ApiClient {
mfa_token_serial: mfaTokenSerial,
mfa_token_pin: mfaTokenPin,
lock_mode: lockMode,
- lock_retention_period_days: lockRetentionPeriodDays
+ lock_retention_period_days: lockRetentionPeriodDays,
+ tags: tags
});
return this.http.put(`${this.url}/${bucket}`, null, { params: params });
});
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.html b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.html
new file mode 100644
index 00000000000..7cf78b8d1fb
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.html
@@ -0,0 +1,21 @@
+<ng-container *ngIf="codes.length > 1; else singleCodeBlock">
+ <pre id="bigCodeBlock">
+ <span *ngFor="let code of codes"
+ class="d-flex p-2 align-items-center justify-content-between text-dark">
+ <span>{{code}}</span>
+ <cd-copy-2-clipboard-button
+ [source]="code"
+ [byId]="false"></cd-copy-2-clipboard-button>
+ </span>
+ </pre>
+</ng-container>
+
+<ng-template #singleCodeBlock>
+ <pre class="d-flex p-2 align-items-center justify-content-between text-dark"
+ id="singleCodeBlock">
+ <span>{{codes}}</span>
+ <cd-copy-2-clipboard-button
+ [source]="codes"
+ [byId]="false"></cd-copy-2-clipboard-button>
+ </pre>
+</ng-template>
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.scss
new file mode 100644
index 00000000000..f601dfe6609
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.scss
@@ -0,0 +1,6 @@
+@use './src/styles/vendor/variables' as vv;
+
+pre {
+ background-color: vv.$code-block-bg;
+ border-radius: 0.5rem;
+}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.spec.ts
new file mode 100644
index 00000000000..bc5ad428fd8
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.spec.ts
@@ -0,0 +1,38 @@
+import { ComponentFixture, TestBed } from '@angular/core/testing';
+
+import { CodeBlockComponent } from './code-block.component';
+import { configureTestBed } from '~/testing/unit-test-helper';
+
+describe('CodeBlockComponent', () => {
+ let component: CodeBlockComponent;
+ let fixture: ComponentFixture<CodeBlockComponent>;
+
+ configureTestBed({
+ declarations: [CodeBlockComponent]
+ });
+
+ beforeEach(() => {
+ fixture = TestBed.createComponent(CodeBlockComponent);
+ component = fixture.componentInstance;
+ component.codes = [];
+ fixture.detectChanges();
+ });
+
+ it('should create', () => {
+ expect(component).toBeTruthy();
+ });
+
+ it('should show single codeblock if there are only one code', () => {
+ component.codes = ['code'];
+ fixture.detectChanges();
+ expect(fixture.nativeElement.querySelector('#singleCodeBlock')).not.toBeNull();
+ expect(fixture.nativeElement.querySelector('#bigCodeBlock')).toBeNull();
+ });
+
+ it('should show single codeblock if there are only one code', () => {
+ component.codes = ['code1', 'code2'];
+ fixture.detectChanges();
+ expect(fixture.nativeElement.querySelector('#bigCodeBlock')).not.toBeNull();
+ expect(fixture.nativeElement.querySelector('#singleCodeBlock')).toBeNull();
+ });
+});
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.ts
new file mode 100644
index 00000000000..91d2d991f37
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/code-block/code-block.component.ts
@@ -0,0 +1,11 @@
+import { Component, Input } from '@angular/core';
+
+@Component({
+ selector: 'cd-code-block',
+ templateUrl: './code-block.component.html',
+ styleUrls: ['./code-block.component.scss']
+})
+export class CodeBlockComponent {
+ @Input()
+ codes: string[];
+}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts
index 17f418d1e14..1f31374c18b 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts
@@ -51,6 +51,8 @@ import { UsageBarComponent } from './usage-bar/usage-bar.component';
import { WizardComponent } from './wizard/wizard.component';
import { CardComponent } from './card/card.component';
import { CardRowComponent } from './card-row/card-row.component';
+import { CodeBlockComponent } from './code-block/code-block.component';
+import { VerticalNavigationComponent } from './vertical-navigation/vertical-navigation.component';
@NgModule({
imports: [
@@ -105,7 +107,9 @@ import { CardRowComponent } from './card-row/card-row.component';
CdLabelComponent,
ColorClassFromTextPipe,
CardComponent,
- CardRowComponent
+ CardRowComponent,
+ CodeBlockComponent,
+ VerticalNavigationComponent
],
providers: [],
exports: [
@@ -137,7 +141,9 @@ import { CardRowComponent } from './card-row/card-row.component';
CustomLoginBannerComponent,
CdLabelComponent,
CardComponent,
- CardRowComponent
+ CardRowComponent,
+ CodeBlockComponent,
+ VerticalNavigationComponent
]
})
export class ComponentsModule {}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.html b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.html
new file mode 100644
index 00000000000..ae48d7fd7fc
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.html
@@ -0,0 +1,24 @@
+<ng-container *ngIf="items.length">
+ <h3 i18n
+ *ngIf="title">{{title}}</h3>
+ <input type="text"
+ placeholder="Filter by name..."
+ (keyup)="updateFilter()"
+ [id]="inputIdentifier"
+ class="form-control text-center mb-2">
+ <div class="overflow-auto">
+ <ul class="nav flex-column nav-pills">
+ <li class="nav-item"
+ *ngFor="let item of filteredItems; trackBy: trackByFn">
+ <a class="nav-link"
+ [class.active]="!activeItem"
+ (click)="selectItem()"
+ *ngIf="item === ''">_nogroup</a>
+ <a class="nav-link text-decoration-none text-break"
+ [class.active]="item === activeItem"
+ (click)="selectItem(item)"
+ *ngIf="item !== ''">{{item}}</a>
+ </li>
+ </ul>
+ </div>
+</ng-container>
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.scss
new file mode 100644
index 00000000000..569e2d68708
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.scss
@@ -0,0 +1,3 @@
+.overflow-auto {
+ max-height: 50vh;
+}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.spec.ts
new file mode 100644
index 00000000000..0d45b339a20
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.spec.ts
@@ -0,0 +1,60 @@
+import { ComponentFixture, TestBed } from '@angular/core/testing';
+
+import { VerticalNavigationComponent } from './vertical-navigation.component';
+import { By } from '@angular/platform-browser';
+
+describe('VerticalNavigationComponent', () => {
+ let component: VerticalNavigationComponent;
+ let fixture: ComponentFixture<VerticalNavigationComponent>;
+
+ beforeEach(async () => {
+ await TestBed.configureTestingModule({
+ declarations: [VerticalNavigationComponent]
+ }).compileComponents();
+
+ fixture = TestBed.createComponent(VerticalNavigationComponent);
+ component = fixture.componentInstance;
+ component.items = ['item1', 'item2', 'item3'];
+ component.inputIdentifier = 'filter';
+ fixture.detectChanges();
+ });
+
+ it('should create', () => {
+ expect(component).toBeTruthy();
+ });
+
+ it('should have a title', () => {
+ component.title = 'testTitle';
+ fixture.detectChanges();
+ const title = fixture.debugElement.query(By.css('h3'));
+ expect(title.nativeElement.textContent).toEqual('testTitle');
+ });
+
+ it('should select the first item as active if no item is selected', () => {
+ expect(component.activeItem).toEqual('item1');
+ });
+
+ it('should filter the items by the keyword in filter input', () => {
+ const event = new KeyboardEvent('keyup');
+ const filterInput = fixture.debugElement.query(By.css('#filter'));
+ filterInput.nativeElement.value = 'item1';
+ filterInput.nativeElement.dispatchEvent(event);
+ fixture.detectChanges();
+ expect(component.filteredItems).toEqual(['item1']);
+
+ filterInput.nativeElement.value = 'item2';
+ filterInput.nativeElement.dispatchEvent(event);
+ fixture.detectChanges();
+ expect(component.filteredItems).toEqual(['item2']);
+ });
+
+ it('should select the item when clicked', () => {
+ component.activeItem = '';
+
+ // click on the first item in the nav list
+ const item = fixture.debugElement.query(By.css('.nav-link'));
+ item.nativeElement.click();
+ fixture.detectChanges();
+ expect(component.activeItem).toEqual('item1');
+ });
+});
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.ts
new file mode 100644
index 00000000000..a46cc4f6c43
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/vertical-navigation/vertical-navigation.component.ts
@@ -0,0 +1,37 @@
+import { Component, EventEmitter, Input, OnInit, Output } from '@angular/core';
+
+@Component({
+ selector: 'cd-vertical-navigation',
+ templateUrl: './vertical-navigation.component.html',
+ styleUrls: ['./vertical-navigation.component.scss']
+})
+export class VerticalNavigationComponent implements OnInit {
+ @Input() items: string[];
+ @Input() title: string;
+ @Input() inputIdentifier: string;
+
+ @Output() emitFilteredItems: EventEmitter<string[]> = new EventEmitter();
+ @Output() emitActiveItem: EventEmitter<string> = new EventEmitter();
+
+ activeItem = '';
+ filteredItems: string[];
+
+ ngOnInit(): void {
+ this.filteredItems = this.items;
+ if (!this.activeItem && this.items.length) this.selectItem(this.items[0]);
+ }
+
+ updateFilter() {
+ const filterInput = document.getElementById(this.inputIdentifier) as HTMLInputElement;
+ this.filteredItems = this.items.filter((item) => item.includes(filterInput.value));
+ }
+
+ selectItem(item = '') {
+ this.activeItem = item;
+ this.emitActiveItem.emit(item);
+ }
+
+ trackByFn(item: number) {
+ return item;
+ }
+}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/crud-table/crud-table.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/crud-table/crud-table.component.ts
index 750152161c2..6881e373b58 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/crud-table/crud-table.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/crud-table/crud-table.component.ts
@@ -120,7 +120,7 @@ export class CRUDTableComponent implements OnInit {
delete() {
const selectedKey = this.selection.first()[this.meta.columnKey];
this.modalRef = this.modalService.show(CriticalConfirmationModalComponent, {
- itemDescription: $localize`${this.meta.columnKey}`,
+ itemDescription: $localize`${this.meta.resource}`,
itemNames: [selectedKey],
submitAction: () => {
this.taskWrapper
@@ -153,7 +153,9 @@ export class CRUDTableComponent implements OnInit {
if (this.selection.hasSelection) {
key = this.selection.first()[this.meta.columnKey];
}
- this.router.navigate(['/cluster/user/edit'], { queryParams: { key: key } });
+
+ const editAction = this.meta.actions.find((action) => action.name === 'Edit');
+ this.router.navigate([editAction.routerLink], { queryParams: { key: key } });
}
authExport() {
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/datatable.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/datatable.module.ts
index 37e94f236be..76cbbcfb3a2 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/datatable.module.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/datatable.module.ts
@@ -61,7 +61,11 @@ import { CheckedTableFormComponent } from './checked-table-form/checked-table-fo
'Role path must start and finish with a slash "/".' +
' (pattern: (\u002F)|(\u002F[\u0021-\u007E]+\u002F))'
},
- { name: 'file_size', message: 'File size must not exceed 4KiB' }
+ { name: 'file_size', message: 'File size must not exceed 4KiB' },
+ {
+ name: 'rgwRoleSessionDuration',
+ message: 'This field must be a number and should be a value from 1 hour to 12 hour'
+ }
],
wrappers: [{ name: 'input-wrapper', component: FormlyInputWrapperComponent }]
}),
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/crud-form/helpers.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/crud-form/helpers.ts
index 1ea21b71081..aca9a20af09 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/crud-form/helpers.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/crud-form/helpers.ts
@@ -3,7 +3,11 @@ import { FormlyFieldConfig } from '@ngx-formly/core';
import { forEach } from 'lodash';
import { formlyAsyncFileValidator } from './validators/file-validator';
import { formlyAsyncJsonValidator } from './validators/json-validator';
-import { formlyRgwRoleNameValidator, formlyRgwRolePath } from './validators/rgw-role-validator';
+import {
+ formlyFormNumberValidator,
+ formlyRgwRoleNameValidator,
+ formlyRgwRolePath
+} from './validators/rgw-role-validator';
export function getFieldState(field: FormlyFieldConfig, uiSchema: any[] = undefined) {
const formState: any[] = uiSchema || field.options?.formState;
@@ -34,6 +38,10 @@ export function setupValidators(field: FormlyFieldConfig, uiSchema: any[]) {
validators.push(formlyAsyncFileValidator);
break;
}
+ case 'rgwRoleSessionDuration': {
+ validators.push(formlyFormNumberValidator);
+ break;
+ }
}
});
field.asyncValidators = { validation: validators };
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/crud-form/validators/rgw-role-validator.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/crud-form/validators/rgw-role-validator.ts
index a100f278bea..c994dc96407 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/crud-form/validators/rgw-role-validator.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/crud-form/validators/rgw-role-validator.ts
@@ -17,3 +17,12 @@ export function formlyRgwRoleNameValidator(control: AbstractControl): Promise<an
resolve({ rgwRoleName: true });
});
}
+
+export function formlyFormNumberValidator(control: AbstractControl): Promise<any> {
+ return new Promise((resolve, _reject) => {
+ if (control.value.match('^[0-9.]+$')) {
+ if (control.value <= 12 && control.value >= 1) resolve(null);
+ }
+ resolve({ rgwRoleSessionDuration: true });
+ });
+}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolume-group.model.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolume-group.model.ts
index fc087ab53d0..246e4543eb9 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolume-group.model.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolume-group.model.ts
@@ -1,6 +1,6 @@
export interface CephfsSubvolumeGroup {
name: string;
- info: CephfsSubvolumeGroupInfo;
+ info?: CephfsSubvolumeGroupInfo;
}
export interface CephfsSubvolumeGroupInfo {
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolume.model.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolume.model.ts
index 41858be6130..25a2a5acc7f 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolume.model.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolume.model.ts
@@ -16,3 +16,13 @@ export interface CephfsSubvolumeInfo {
gid: number;
pool_namespace: string;
}
+
+export interface SubvolumeSnapshot {
+ name: string;
+ info: SubvolumeSnapshotInfo;
+}
+
+export interface SubvolumeSnapshotInfo {
+ created_at: string;
+ has_pending_clones: string;
+}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolumegroup.model.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolumegroup.model.ts
deleted file mode 100644
index fc087ab53d0..00000000000
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cephfs-subvolumegroup.model.ts
+++ /dev/null
@@ -1,13 +0,0 @@
-export interface CephfsSubvolumeGroup {
- name: string;
- info: CephfsSubvolumeGroupInfo;
-}
-
-export interface CephfsSubvolumeGroupInfo {
- mode: number;
- bytes_pcent: number;
- bytes_quota: number;
- data_pool: string;
- state: string;
- created_at: string;
-}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/crud-table-metadata.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/crud-table-metadata.ts
index 140fa5b5f8e..dc33e6236ae 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/crud-table-metadata.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/crud-table-metadata.ts
@@ -14,4 +14,5 @@ export class CrudMetadata {
actions: CdTableAction[];
forms: any;
columnKey: string;
+ resource: string;
}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/snapshot-schedule.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/snapshot-schedule.ts
new file mode 100644
index 00000000000..b1cea7466f6
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/snapshot-schedule.ts
@@ -0,0 +1,17 @@
+export interface SnapshotSchedule {
+ fs?: string;
+ subvol?: string;
+ path: string;
+ rel_path?: string;
+ schedule: string;
+ retention?: Record<string, number> | string;
+ start: Date;
+ created: Date;
+ first?: string;
+ last?: string;
+ last_pruned?: string;
+ created_count?: number;
+ pruned_count?: number;
+ active: boolean;
+ status: 'Active' | 'Inactive';
+}
diff --git a/src/pybind/mgr/dashboard/frontend/src/styles/defaults/_bootstrap-defaults.scss b/src/pybind/mgr/dashboard/frontend/src/styles/defaults/_bootstrap-defaults.scss
index e9c8a595620..d69abf12bc8 100644
--- a/src/pybind/mgr/dashboard/frontend/src/styles/defaults/_bootstrap-defaults.scss
+++ b/src/pybind/mgr/dashboard/frontend/src/styles/defaults/_bootstrap-defaults.scss
@@ -96,6 +96,8 @@ $chart-color-translucent-blue: #0096dc80 !default;
$chart-color-border: #00000020 !default;
$chart-color-translucent-yellow: #ef923472 !default;
+$code-block-bg: #f7f7f9 !default;
+
// Typography
$font-family-sans-serif: 'Helvetica Neue', Helvetica, Arial, 'Noto Sans', sans-serif,
diff --git a/src/pybind/mgr/dashboard/openapi.yaml b/src/pybind/mgr/dashboard/openapi.yaml
index d35ea87e15a..0ed83aab6c8 100644
--- a/src/pybind/mgr/dashboard/openapi.yaml
+++ b/src/pybind/mgr/dashboard/openapi.yaml
@@ -1758,6 +1758,43 @@ paths:
summary: Rename CephFS Volume
tags:
- Cephfs
+ /api/cephfs/snaphost/schedule:
+ get:
+ parameters:
+ - in: query
+ name: fs
+ required: true
+ schema:
+ type: string
+ - default: /
+ in: query
+ name: path
+ schema:
+ type: string
+ - default: true
+ in: query
+ name: recursive
+ schema:
+ type: boolean
+ responses:
+ '200':
+ content:
+ application/vnd.ceph.api.v1.0+json:
+ type: object
+ description: OK
+ '400':
+ description: Operation exception. Please check the response body for details.
+ '401':
+ description: Unauthenticated access. Please login first.
+ '403':
+ description: Unauthorized access. Please check your permissions.
+ '500':
+ description: Unexpected error. Please check the response body for the stack
+ trace.
+ security:
+ - jwt: []
+ tags:
+ - CephFSSnapshotSchedule
/api/cephfs/subvolume:
post:
parameters: []
@@ -1882,6 +1919,11 @@ paths:
required: true
schema:
type: string
+ - default: true
+ in: query
+ name: info
+ schema:
+ type: boolean
responses:
'200':
content:
@@ -1977,6 +2019,48 @@ paths:
- jwt: []
tags:
- CephfsSubvolumeGroup
+ /api/cephfs/subvolume/snapshot/{vol_name}/{subvol_name}:
+ get:
+ parameters:
+ - in: path
+ name: vol_name
+ required: true
+ schema:
+ type: string
+ - in: path
+ name: subvol_name
+ required: true
+ schema:
+ type: string
+ - default: ''
+ in: query
+ name: group_name
+ schema:
+ type: string
+ - default: true
+ in: query
+ name: info
+ schema:
+ type: boolean
+ responses:
+ '200':
+ content:
+ application/vnd.ceph.api.v1.0+json:
+ type: object
+ description: OK
+ '400':
+ description: Operation exception. Please check the response body for details.
+ '401':
+ description: Unauthenticated access. Please login first.
+ '403':
+ description: Unauthorized access. Please check your permissions.
+ '500':
+ description: Unexpected error. Please check the response body for the stack
+ trace.
+ security:
+ - jwt: []
+ tags:
+ - CephfsSubvolumeSnapshot
/api/cephfs/subvolume/{vol_name}:
delete:
parameters:
@@ -2036,6 +2120,11 @@ paths:
name: group_name
schema:
type: string
+ - default: true
+ in: query
+ name: info
+ schema:
+ type: boolean
responses:
'200':
content:
@@ -2102,6 +2191,38 @@ paths:
- jwt: []
tags:
- CephFSSubvolume
+ /api/cephfs/subvolume/{vol_name}/exists:
+ get:
+ parameters:
+ - in: path
+ name: vol_name
+ required: true
+ schema:
+ type: string
+ - default: ''
+ in: query
+ name: group_name
+ schema:
+ type: string
+ responses:
+ '200':
+ content:
+ application/vnd.ceph.api.v1.0+json:
+ type: object
+ description: OK
+ '400':
+ description: Operation exception. Please check the response body for details.
+ '401':
+ description: Unauthenticated access. Please login first.
+ '403':
+ description: Unauthorized access. Please check your permissions.
+ '500':
+ description: Unexpected error. Please check the response body for the stack
+ trace.
+ security:
+ - jwt: []
+ tags:
+ - CephFSSubvolume
/api/cephfs/subvolume/{vol_name}/info:
get:
parameters:
@@ -8817,6 +8938,8 @@ paths:
type: string
placement_target:
type: string
+ tags:
+ type: string
uid:
type: string
zonegroup:
@@ -9129,6 +9252,8 @@ paths:
type: string
mfa_token_serial:
type: string
+ tags:
+ type: string
uid:
type: string
versioning_state:
@@ -9596,7 +9721,80 @@ paths:
trace.
security:
- jwt: []
- summary: Create Ceph User
+ summary: Create RGW role
+ tags:
+ - RGW
+ put:
+ parameters: []
+ requestBody:
+ content:
+ application/json:
+ schema:
+ properties:
+ max_session_duration:
+ type: string
+ role_name:
+ type: string
+ required:
+ - role_name
+ - max_session_duration
+ type: object
+ responses:
+ '200':
+ content:
+ application/vnd.ceph.api.v1.0+json:
+ type: object
+ description: Resource updated.
+ '202':
+ content:
+ application/vnd.ceph.api.v1.0+json:
+ type: object
+ description: Operation is still executing. Please check the task queue.
+ '400':
+ description: Operation exception. Please check the response body for details.
+ '401':
+ description: Unauthenticated access. Please login first.
+ '403':
+ description: Unauthorized access. Please check your permissions.
+ '500':
+ description: Unexpected error. Please check the response body for the stack
+ trace.
+ security:
+ - jwt: []
+ summary: Edit RGW role
+ tags:
+ - RGW
+ /api/rgw/roles/{role_name}:
+ delete:
+ parameters:
+ - in: path
+ name: role_name
+ required: true
+ schema:
+ type: string
+ responses:
+ '202':
+ content:
+ application/vnd.ceph.api.v1.0+json:
+ type: object
+ description: Operation is still executing. Please check the task queue.
+ '204':
+ content:
+ application/vnd.ceph.api.v1.0+json:
+ type: object
+ description: Resource deleted.
+ '400':
+ description: Operation exception. Please check the response body for details.
+ '401':
+ description: Unauthenticated access. Please login first.
+ '403':
+ description: Unauthorized access. Please check your permissions.
+ '500':
+ description: Unexpected error. Please check the response body for the stack
+ trace.
+ security:
+ - jwt: []
+ summary: Delete RGW role
tags:
- RGW
/api/rgw/site:
@@ -12771,12 +12969,16 @@ servers:
tags:
- description: Initiate a session with Ceph
name: Auth
+- description: Cephfs Snapshot Scheduling API
+ name: CephFSSnapshotSchedule
- description: CephFS Subvolume Management API
name: CephFSSubvolume
- description: Cephfs Management API
name: Cephfs
- description: Cephfs Subvolume Group Management API
name: CephfsSubvolumeGroup
+- description: Cephfs Subvolume Snapshot Management API
+ name: CephfsSubvolumeSnapshot
- description: Get Cluster Details
name: Cluster
- description: Manage Cluster Configurations
diff --git a/src/pybind/mgr/dashboard/services/rgw_client.py b/src/pybind/mgr/dashboard/services/rgw_client.py
index 2d3226bab3e..aec3b89b64c 100644
--- a/src/pybind/mgr/dashboard/services/rgw_client.py
+++ b/src/pybind/mgr/dashboard/services/rgw_client.py
@@ -702,6 +702,19 @@ class RgwClient(RestClient):
except RequestException as e:
raise DashboardException(msg=str(e), component='rgw')
+ @RestClient.api_put('/{bucket_name}?tagging')
+ def set_tags(self, bucket_name, tags, request=None):
+ # pylint: disable=unused-argument
+ try:
+ ET.fromstring(tags)
+ except ET.ParseError:
+ return "Data must be properly formatted"
+ try:
+ result = request(data=tags) # type: ignore
+ except RequestException as e:
+ raise DashboardException(msg=str(e), component='rgw')
+ return result
+
@RestClient.api_get('/{bucket_name}?object-lock')
def get_bucket_locking(self, bucket_name, request=None):
# type: (str, Optional[object]) -> dict
@@ -852,6 +865,31 @@ class RgwClient(RestClient):
f' For more information about the format look at {link}')
raise DashboardException(msg=msg, component='rgw')
+ def get_role(self, role_name: str):
+ rgw_get_role_command = ['role', 'get', '--role-name', role_name]
+ code, role, _err = mgr.send_rgwadmin_command(rgw_get_role_command)
+ if code != 0:
+ raise DashboardException(msg=f'Error getting role with code {code}: {_err}',
+ component='rgw')
+ return role
+
+ def update_role(self, role_name: str, max_session_duration: str):
+ rgw_update_role_command = ['role', 'update', '--role-name',
+ role_name, '--max_session_duration', max_session_duration]
+ code, _, _err = mgr.send_rgwadmin_command(rgw_update_role_command,
+ stdout_as_json=False)
+ if code != 0:
+ raise DashboardException(msg=f'Error updating role with code {code}: {_err}',
+ component='rgw')
+
+ def delete_role(self, role_name: str) -> None:
+ rgw_delete_role_command = ['role', 'delete', '--role-name', role_name]
+ code, _, _err = mgr.send_rgwadmin_command(rgw_delete_role_command,
+ stdout_as_json=False)
+ if code != 0:
+ raise DashboardException(msg=f'Error deleting role with code {code}: {_err}',
+ component='rgw')
+
@RestClient.api_get('/{bucket_name}?policy')
def get_bucket_policy(self, bucket_name: str, request=None):
"""
diff --git a/src/pybind/mgr/devicehealth/module.py b/src/pybind/mgr/devicehealth/module.py
index 07768db7565..86ea1d28072 100644
--- a/src/pybind/mgr/devicehealth/module.py
+++ b/src/pybind/mgr/devicehealth/module.py
@@ -502,8 +502,8 @@ CREATE TABLE DeviceHealthMetrics (
def put_device_metrics(self, devid: str, data: Any) -> None:
SQL = """
- INSERT INTO DeviceHealthMetrics (devid, raw_smart)
- VALUES (?, ?);
+ INSERT OR REPLACE INTO DeviceHealthMetrics (devid, raw_smart, time)
+ VALUES (?, ?, strftime('%s', 'now'));
"""
with self._db_lock, self.db:
diff --git a/src/pybind/mgr/nfs/export.py b/src/pybind/mgr/nfs/export.py
index 5887c898fef..aaa93c34f6c 100644
--- a/src/pybind/mgr/nfs/export.py
+++ b/src/pybind/mgr/nfs/export.py
@@ -167,9 +167,22 @@ class AppliedExportResults:
def __init__(self) -> None:
self.changes: List[Dict[str, str]] = []
self.has_error = False
+ self.exceptions: List[Exception] = []
+ self.faulty_export_block_indices = ""
+ self.num_errors = 0
+ self.status = ""
- def append(self, value: Dict[str, str]) -> None:
+ def append(self, value: Dict[str, Any]) -> None:
if value.get("state", "") == "error":
+ self.num_errors += 1
+ # If there is an error then there must be an exception in the dict.
+ self.exceptions.append(value.pop("exception"))
+ # Index is for indicating at which export block in the conf/json
+ # file did the export creation/update failed.
+ if len(self.faulty_export_block_indices) == 0:
+ self.faulty_export_block_indices = str(value.pop("index"))
+ else:
+ self.faulty_export_block_indices += f", {value.pop('index')}"
self.has_error = True
self.changes.append(value)
@@ -177,7 +190,29 @@ class AppliedExportResults:
return self.changes
def mgr_return_value(self) -> int:
- return -errno.EIO if self.has_error else 0
+ if self.has_error:
+ if len(self.exceptions) == 1:
+ ex = self.exceptions[0]
+ if isinstance(ex, NFSException):
+ return ex.errno
+ # Some non-nfs exception occurred, this can be anything
+ # therefore return EAGAIN as a generalised errno.
+ return -errno.EAGAIN
+ # There are multiple failures so returning EIO as a generalised
+ # errno.
+ return -errno.EIO
+ return 0
+
+ def mgr_status_value(self) -> str:
+ if self.has_error:
+ if len(self.faulty_export_block_indices) == 1:
+ self.status = f"{str(self.exceptions[0])} for export block" \
+ f" at index {self.faulty_export_block_indices}"
+ elif len(self.faulty_export_block_indices) > 1:
+ self.status = f"{self.num_errors} export blocks (at index" \
+ f" {self.faulty_export_block_indices}) failed" \
+ " to be created/updated"
+ return self.status
class ExportMgr:
@@ -501,7 +536,12 @@ class ExportMgr:
aeresults = AppliedExportResults()
for export in exports:
- aeresults.append(self._change_export(cluster_id, export))
+ changed_export = self._change_export(cluster_id, export)
+ # This will help figure out which export blocks in conf/json file
+ # are problematic.
+ if changed_export.get("state", "") == "error":
+ changed_export.update({"index": exports.index(export) + 1})
+ aeresults.append(changed_export)
return aeresults
def _read_export_config(self, cluster_id: str, export_config: str) -> List[Dict]:
@@ -525,7 +565,7 @@ class ExportMgr:
return j # j is already a list object
return [j] # return a single object list, with j as the only item
- def _change_export(self, cluster_id: str, export: Dict) -> Dict[str, str]:
+ def _change_export(self, cluster_id: str, export: Dict) -> Dict[str, Any]:
try:
return self._apply_export(cluster_id, export)
except NotImplementedError:
@@ -543,7 +583,7 @@ class ExportMgr:
except Exception as ex:
msg = f'Failed to apply export: {ex}'
log.exception(msg)
- return {"state": "error", "msg": msg}
+ return {"state": "error", "msg": msg, "exception": ex}
def _update_user_id(
self,
diff --git a/src/pybind/mgr/object_format.py b/src/pybind/mgr/object_format.py
index b53bc3eb060..4a2b6fa8b69 100644
--- a/src/pybind/mgr/object_format.py
+++ b/src/pybind/mgr/object_format.py
@@ -228,13 +228,22 @@ class YAMLFormatter(Protocol):
class ReturnValueProvider(Protocol):
def mgr_return_value(self) -> int:
- """Return an integer value to provide the Ceph MGR with a error code
- for the MGR's response tuple. Zero means success. Return an negative
+ """Return an integer value to provide the Ceph MGR with an error code
+ for the MGR's response tuple. Zero means success. Return a negative
errno otherwise.
"""
... # pragma: no cover
+class StatusValueProvider(Protocol):
+ def mgr_status_value(self) -> str:
+ """Return a string value to provide the Ceph MGR with an error status
+ for the MGR's response tuple. Empty string means success. Return a string
+ containing error info otherwise.
+ """
+ ... # pragma: no cover
+
+
class CommonFormatter(Protocol):
"""A protocol that indicates the type is a formatter for multiple
possible formats.
@@ -272,10 +281,15 @@ def _is_yaml_data_provider(obj: YAMLDataProvider) -> bool:
def _is_return_value_provider(obj: ReturnValueProvider) -> bool:
- """Return true if obj is usable as a YAMLDataProvider."""
+ """Return true if obj is usable as a ReturnValueProvider."""
return callable(getattr(obj, 'mgr_return_value', None))
+def _is_status_value_provider(obj: StatusValueProvider) -> bool:
+ """Return true if obj is usable as a StatusValueProvider"""
+ return callable(getattr(obj, 'mgr_status_value', None))
+
+
class ObjectFormatAdapter:
"""A format adapater for a single object.
Given an input object, this type will adapt the object, or a simplified
@@ -366,6 +380,27 @@ class ReturnValueAdapter:
return self.default_return_value
+class StatusValueAdapter:
+ """A status-value adapter for an object.
+ Given an input object, this type will attempt to get a mgr status value
+ from the object if provides a `mgr_status_value` function.
+ If not it returns a default status value, typically an empty string.
+ """
+
+ def __init__(
+ self,
+ obj: Any,
+ default: str = "",
+ ) -> None:
+ self.obj = obj
+ self.default_status = default
+
+ def mgr_status_value(self) -> str:
+ if _is_status_value_provider(self.obj):
+ return str(self.obj.mgr_status_value())
+ return self.default_status
+
+
class ErrorResponseBase(Exception):
"""An exception that can directly be converted to a mgr reponse."""
@@ -448,6 +483,7 @@ ObjectResponseFuncType = Union[
Callable[..., JSONDataProvider],
Callable[..., YAMLDataProvider],
Callable[..., ReturnValueProvider],
+ Callable[..., StatusValueProvider],
]
@@ -487,6 +523,10 @@ class Responder:
"""Return a ReturnValueProvider for the given object."""
return ReturnValueAdapter(obj)
+ def _statusval_provider(self, obj: Any) -> StatusValueProvider:
+ """Return a StatusValueProvider for the given object."""
+ return StatusValueAdapter(obj)
+
def _get_format_func(
self, obj: Any, format_req: Optional[str] = None
) -> Callable:
@@ -515,6 +555,12 @@ class Responder:
"""Return a mgr return-value for the given object (usually zero)."""
return self._retval_provider(obj).mgr_return_value()
+ def _return_status(self, obj: Any) -> str:
+ """Return a mgr status-value for the given object (usually empty
+ string).
+ """
+ return self._statusval_provider(obj).mgr_status_value()
+
def __call__(self, f: ObjectResponseFuncType) -> HandlerFuncType:
"""Wrap a python function so that the original function's return value
becomes the source for an automatically formatted mgr response.
@@ -528,9 +574,10 @@ class Responder:
robj = f(*args, **kwargs)
body = self._formatted(robj, format_req)
retval = self._return_value(robj)
+ statusval = self._return_status(robj)
except ErrorResponseBase as e:
return e.format_response()
- return retval, body, ""
+ return retval, body, statusval
# set the extra args on our wrapper function. this will be consumed by
# the CLICommand decorator and added to the set of optional arguments
diff --git a/src/pybind/mgr/tests/test_object_format.py b/src/pybind/mgr/tests/test_object_format.py
index d2fd20870e7..2e674c69838 100644
--- a/src/pybind/mgr/tests/test_object_format.py
+++ b/src/pybind/mgr/tests/test_object_format.py
@@ -115,12 +115,18 @@ def test_format_yaml(obj: Any, compatible: bool, yaml_val: str):
class Retty:
- def __init__(self, v) -> None:
+ def __init__(self, v, status="") -> None:
self.value = v
+ self.status = status
def mgr_return_value(self) -> int:
return self.value
+ def mgr_status_value(self) -> str:
+ if self.status:
+ return self.status
+ return "NOPE"
+
@pytest.mark.parametrize(
"obj, ret",
@@ -139,6 +145,24 @@ def test_return_value(obj: Any, ret: int):
assert rva.mgr_return_value() == ret
+@pytest.mark.parametrize(
+ "obj, ret",
+ [
+ ({}, ""),
+ ({"fish": "sticks"}, ""),
+ (-55, ""),
+ (Retty(0), "NOPE"),
+ (Retty(-55, "cake"), "cake"),
+ (Retty(-50, "pie"), "pie"),
+ ],
+)
+def test_return_status(obj: Any, ret: str):
+ rva = object_format.StatusValueAdapter(obj)
+ # a StatusValueAdapter instance meets the StatusValueProvider protocol.
+ assert object_format._is_status_value_provider(rva)
+ assert rva.mgr_status_value() == ret
+
+
def test_valid_formats():
ofa = object_format.ObjectFormatAdapter({"fred": "wilma"})
vf = ofa.valid_formats()
diff --git a/src/pybind/mgr/volumes/fs/async_cloner.py b/src/pybind/mgr/volumes/fs/async_cloner.py
index 95f7d64e1b3..146d2e75590 100644
--- a/src/pybind/mgr/volumes/fs/async_cloner.py
+++ b/src/pybind/mgr/volumes/fs/async_cloner.py
@@ -191,7 +191,7 @@ def bulk_copy(fs_handle, source_path, dst_path, should_cancel):
def set_quota_on_clone(fs_handle, clone_volumes_pair):
src_path = clone_volumes_pair[1].snapshot_data_path(clone_volumes_pair[2])
dst_path = clone_volumes_pair[0].path
- quota = None # type: Optional[int]
+ quota: Optional[int] = None
try:
quota = int(fs_handle.getxattr(src_path, 'ceph.quota.max_bytes').decode('utf-8'))
except cephfs.NoData:
@@ -205,7 +205,7 @@ def set_quota_on_clone(fs_handle, clone_volumes_pair):
except cephfs.Error as e:
raise VolumeException(-e.args[0], e.args[1])
- quota_files = None # type: Optional[int]
+ quota_files: Optional[int] = None
try:
quota_files = int(fs_handle.getxattr(src_path, 'ceph.quota.max_files').decode('utf-8'))
except cephfs.NoData:
@@ -221,19 +221,25 @@ def set_quota_on_clone(fs_handle, clone_volumes_pair):
def do_clone(fs_client, volspec, volname, groupname, subvolname, should_cancel):
with open_volume_lockless(fs_client, volname) as fs_handle:
- with open_clone_subvolume_pair(fs_client, fs_handle, volspec, volname, groupname, subvolname) as clone_volumes:
- src_path = clone_volumes[1].snapshot_data_path(clone_volumes[2])
- dst_path = clone_volumes[0].path
+ with open_clone_subvolume_pair(fs_client, fs_handle, volspec, volname,
+ groupname, subvolname) \
+ as (subvol0, subvol1, subvol2):
+ src_path = subvol1.snapshot_data_path(subvol2)
+ dst_path = subvol0.path
+ # XXX: this is where cloning (of subvolume's snapshots) actually
+ # happens.
bulk_copy(fs_handle, src_path, dst_path, should_cancel)
- set_quota_on_clone(fs_handle, clone_volumes)
+ set_quota_on_clone(fs_handle, (subvol0, subvol1, subvol2))
def update_clone_failure_status(fs_client, volspec, volname, groupname, subvolname, ve):
with open_volume_lockless(fs_client, volname) as fs_handle:
- with open_clone_subvolume_pair(fs_client, fs_handle, volspec, volname, groupname, subvolname) as clone_volumes:
+ with open_clone_subvolume_pair(fs_client, fs_handle, volspec, volname,
+ groupname, subvolname) \
+ as (subvol0, subvol1, subvol2) :
if ve.errno == -errno.EINTR:
- clone_volumes[0].add_clone_failure(-ve.errno, "user interrupted clone operation")
+ subvol0.add_clone_failure(-ve.errno, "user interrupted clone operation")
else:
- clone_volumes[0].add_clone_failure(-ve.errno, ve.error_str)
+ subvol0.add_clone_failure(-ve.errno, ve.error_str)
def log_clone_failure(volname, groupname, subvolname, ve):
if ve.errno == -errno.EINTR:
@@ -261,8 +267,10 @@ def handle_clone_failed(fs_client, volspec, volname, index, groupname, subvolnam
try:
with open_volume(fs_client, volname) as fs_handle:
# detach source but leave the clone section intact for later inspection
- with open_clone_subvolume_pair(fs_client, fs_handle, volspec, volname, groupname, subvolname) as clone_volumes:
- clone_volumes[1].detach_snapshot(clone_volumes[2], index)
+ with open_clone_subvolume_pair(fs_client, fs_handle, volspec,
+ volname, groupname, subvolname) \
+ as (subvol0, subvol1, subvol2):
+ subvol1.detach_snapshot(subvol2, index)
except (MetadataMgrException, VolumeException) as e:
log.error("failed to detach clone from snapshot: {0}".format(e))
return (None, True)
@@ -270,9 +278,11 @@ def handle_clone_failed(fs_client, volspec, volname, index, groupname, subvolnam
def handle_clone_complete(fs_client, volspec, volname, index, groupname, subvolname, should_cancel):
try:
with open_volume(fs_client, volname) as fs_handle:
- with open_clone_subvolume_pair(fs_client, fs_handle, volspec, volname, groupname, subvolname) as clone_volumes:
- clone_volumes[1].detach_snapshot(clone_volumes[2], index)
- clone_volumes[0].remove_clone_source(flush=True)
+ with open_clone_subvolume_pair(fs_client, fs_handle, volspec,
+ volname, groupname, subvolname) \
+ as (subvol0, subvol1, subvol2):
+ subvol1.detach_snapshot(subvol2, index)
+ subvol0.remove_clone_source(flush=True)
except (MetadataMgrException, VolumeException) as e:
log.error("failed to detach clone from snapshot: {0}".format(e))
return (None, True)
@@ -287,9 +297,14 @@ def start_clone_sm(fs_client, volspec, volname, index, groupname, subvolname, st
time.sleep(snapshot_clone_delay)
log.info("Delayed cloning ({0}, {1}, {2}) -- by {3} seconds".format(volname, groupname, subvolname, snapshot_clone_delay))
while not finished:
+ # XXX: this is where request operation is mapped to relevant
+ # function.
handler = state_table.get(current_state, None)
if not handler:
raise VolumeException(-errno.EINVAL, "invalid clone state: \"{0}\"".format(current_state))
+ # XXX: this is where the requested operation for subvolume's
+ # snapshot clone is performed. the function for the request
+ # operation is run through "handler".
(next_state, finished) = handler(fs_client, volspec, volname, index, groupname, subvolname, should_cancel)
if next_state:
log.debug("({0}, {1}, {2}) transition state [\"{3}\" => \"{4}\"]".format(volname, groupname, subvolname,\
diff --git a/src/pybind/mgr/volumes/fs/operations/access.py b/src/pybind/mgr/volumes/fs/operations/access.py
index 9b7b2431605..7e916e95539 100644
--- a/src/pybind/mgr/volumes/fs/operations/access.py
+++ b/src/pybind/mgr/volumes/fs/operations/access.py
@@ -4,7 +4,7 @@ from typing import List
def prepare_updated_caps_list(existing_caps, mds_cap_str, osd_cap_str, authorize=True):
- caps_list = [] # type: List[str]
+ caps_list: List[str] = []
for k, v in existing_caps['caps'].items():
if k == 'mds' or k == 'osd':
continue
diff --git a/src/pybind/mgr/volumes/fs/operations/lock.py b/src/pybind/mgr/volumes/fs/operations/lock.py
index 7ef6923e115..9588ddec164 100644
--- a/src/pybind/mgr/volumes/fs/operations/lock.py
+++ b/src/pybind/mgr/volumes/fs/operations/lock.py
@@ -22,10 +22,10 @@ class GlobalLock(object):
See: https://people.eecs.berkeley.edu/~kubitron/courses/cs262a-F14/projects/reports/project6_report.pdf
"""
- _shared_state = {
+ _shared_state: Dict = {
'lock' : Lock(),
'init' : False
- } # type: Dict
+ }
def __init__(self):
with self._shared_state['lock']:
diff --git a/src/pybind/mgr/volumes/fs/operations/pin_util.py b/src/pybind/mgr/volumes/fs/operations/pin_util.py
index 9ea79e546e2..a12ab5b4d4b 100644
--- a/src/pybind/mgr/volumes/fs/operations/pin_util.py
+++ b/src/pybind/mgr/volumes/fs/operations/pin_util.py
@@ -1,4 +1,3 @@
-import os
import errno
import cephfs
@@ -25,7 +24,7 @@ def pin(fs, path, pin_type, pin_setting):
try:
pin_setting = _pin_value[pin_type](pin_setting)
- except ValueError as e:
+ except ValueError:
raise VolumeException(-errno.EINVAL, f"pin value wrong type: {pin_setting}")
try:
diff --git a/src/pybind/mgr/volumes/fs/operations/trash.py b/src/pybind/mgr/volumes/fs/operations/trash.py
index 66f1d71cf89..d76d43a43d1 100644
--- a/src/pybind/mgr/volumes/fs/operations/trash.py
+++ b/src/pybind/mgr/volumes/fs/operations/trash.py
@@ -6,7 +6,6 @@ from contextlib import contextmanager
import cephfs
from .template import GroupTemplate
-from ..fs_util import listdir
from ..exception import VolumeException
log = logging.getLogger(__name__)
diff --git a/src/pybind/mgr/volumes/fs/operations/versions/__init__.py b/src/pybind/mgr/volumes/fs/operations/versions/__init__.py
index 544afa165f9..6c96c457019 100644
--- a/src/pybind/mgr/volumes/fs/operations/versions/__init__.py
+++ b/src/pybind/mgr/volumes/fs/operations/versions/__init__.py
@@ -83,7 +83,7 @@ class SubvolumeLoader(object):
subvolume_type = SubvolumeTypes.TYPE_NORMAL
try:
initial_state = SubvolumeOpSm.get_init_state(subvolume_type)
- except OpSmException as oe:
+ except OpSmException:
raise VolumeException(-errno.EINVAL, "subvolume creation failed: internal error")
qpath = subvolume.base_path.decode('utf-8')
# legacy is only upgradable to v1
diff --git a/src/pybind/mgr/volumes/fs/operations/versions/metadata_manager.py b/src/pybind/mgr/volumes/fs/operations/versions/metadata_manager.py
index 718735d91b1..b33a2b48b77 100644
--- a/src/pybind/mgr/volumes/fs/operations/versions/metadata_manager.py
+++ b/src/pybind/mgr/volumes/fs/operations/versions/metadata_manager.py
@@ -1,7 +1,6 @@
import os
import errno
import logging
-import sys
import threading
import configparser
import re
diff --git a/src/pybind/mgr/volumes/fs/operations/versions/op_sm.py b/src/pybind/mgr/volumes/fs/operations/versions/op_sm.py
index 1142600cbb2..93eafb2bde4 100644
--- a/src/pybind/mgr/volumes/fs/operations/versions/op_sm.py
+++ b/src/pybind/mgr/volumes/fs/operations/versions/op_sm.py
@@ -19,7 +19,7 @@ class TransitionKey(object):
return not(self == other)
class SubvolumeOpSm(object):
- transition_table = {} # type: Dict
+ transition_table: Dict = {}
@staticmethod
def is_complete_state(state):
diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_base.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_base.py
index 3bae0707a6a..8fbe177e5f4 100644
--- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_base.py
+++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_base.py
@@ -144,7 +144,7 @@ class SubvolumeBase(object):
try:
self.fs.stat(self.legacy_config_path)
self.legacy_mode = True
- except cephfs.Error as e:
+ except cephfs.Error:
pass
log.debug("loading config "
@@ -160,7 +160,7 @@ class SubvolumeBase(object):
def get_attrs(self, pathname):
# get subvolume attributes
- attrs = {} # type: Dict[str, Union[int, str, None]]
+ attrs: Dict[str, Union[int, str, None]] = {}
stx = self.fs.statx(pathname,
cephfs.CEPH_STATX_UID | cephfs.CEPH_STATX_GID
| cephfs.CEPH_STATX_MODE,
diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py
index b5a10dd6c7f..88d186f3f15 100644
--- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py
+++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py
@@ -55,7 +55,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate):
try:
# no need to stat the path -- open() does that
return self.metadata_mgr.get_global_option(MetadataManager.GLOBAL_META_KEY_PATH).encode('utf-8')
- except MetadataMgrException as me:
+ except MetadataMgrException:
raise VolumeException(-errno.EINVAL, "error fetching subvolume metadata")
@property
@@ -68,7 +68,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate):
try:
# MDS treats this as a noop for already marked subvolume
self.fs.setxattr(self.path, 'ceph.dir.subvolume', b'1', 0)
- except cephfs.InvalidValue as e:
+ except cephfs.InvalidValue:
raise VolumeException(-errno.EINVAL, "invalid value specified for ceph.dir.subvolume")
except cephfs.Error as e:
raise VolumeException(-e.args[0], e.args[1])
@@ -89,7 +89,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate):
subvolume_type = SubvolumeTypes.TYPE_NORMAL
try:
initial_state = SubvolumeOpSm.get_init_state(subvolume_type)
- except OpSmException as oe:
+ except OpSmException:
raise VolumeException(-errno.EINVAL, "subvolume creation failed: internal error")
subvol_path = os.path.join(self.base_path, str(uuid.uuid4()).encode('utf-8'))
@@ -156,7 +156,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate):
subvolume_type = SubvolumeTypes.TYPE_CLONE
try:
initial_state = SubvolumeOpSm.get_init_state(subvolume_type)
- except OpSmException as oe:
+ except OpSmException:
raise VolumeException(-errno.EINVAL, "clone failed: internal error")
subvol_path = os.path.join(self.base_path, str(uuid.uuid4()).encode('utf-8'))
@@ -596,7 +596,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate):
"""
with self.auth_mdata_mgr.subvol_metadata_lock(self.group.groupname, self.subvolname):
meta = self.auth_mdata_mgr.subvol_metadata_get(self.group.groupname, self.subvolname)
- auths = [] # type: List[Dict[str,str]]
+ auths: List[Dict[str,str]] = []
if not meta or not meta['auths']:
return auths
@@ -669,7 +669,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate):
pass
else:
raise
- except MetadataMgrException as me:
+ except MetadataMgrException:
raise VolumeException(-errno.EINVAL, "error fetching subvolume metadata")
return clone_source
@@ -684,16 +684,16 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate):
def status(self):
state = SubvolumeStates.from_value(self.metadata_mgr.get_global_option(MetadataManager.GLOBAL_META_KEY_STATE))
subvolume_type = self.subvol_type
- subvolume_status = {
- 'state' : state.value
- }
- if not SubvolumeOpSm.is_complete_state(state) and subvolume_type == SubvolumeTypes.TYPE_CLONE:
- subvolume_status["source"] = self._get_clone_source()
- if SubvolumeOpSm.is_failed_state(state) and subvolume_type == SubvolumeTypes.TYPE_CLONE:
- try:
- subvolume_status["failure"] = self._get_clone_failure()
- except MetadataMgrException:
- pass
+ subvolume_status = {'state' : state.value}
+
+ if subvolume_type == SubvolumeTypes.TYPE_CLONE:
+ if not SubvolumeOpSm.is_complete_state(state):
+ subvolume_status["source"] = self._get_clone_source()
+ if SubvolumeOpSm.is_failed_state(state):
+ try:
+ subvolume_status["failure"] = self._get_clone_failure()
+ except MetadataMgrException:
+ pass
return subvolume_status
@@ -744,7 +744,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate):
raise
def get_pending_clones(self, snapname):
- pending_clones_info = {"has_pending_clones": "no"} # type: Dict[str, Any]
+ pending_clones_info: Dict[str, Any] = {"has_pending_clones": "no"}
pending_track_id_list = []
pending_clone_list = []
index_path = ""
@@ -777,7 +777,6 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate):
# If clone is completed between 'list_all_keys_with_specified_values_from_section'
# and readlink(track_id_path) call then readlink will fail with error ENOENT (2)
# Hence we double check whether track_id is exist in .meta file or not.
- value = self.metadata_mgr.get_option('clone snaps', track_id)
# Edge case scenario.
# If track_id for clone exist but path /volumes/_index/clone/{track_id} not found
# then clone is orphan.
@@ -790,7 +789,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate):
path = Path(link_path.decode('utf-8'))
clone_name = os.path.basename(link_path).decode('utf-8')
group_name = os.path.basename(path.parent.absolute())
- details = {"name": clone_name} # type: Dict[str, str]
+ details = {"name": clone_name}
if group_name != Group.NO_GROUP_NAME:
details["target_group"] = group_name
pending_clone_list.append(details)
@@ -839,7 +838,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate):
snap_info[key] = self.fs.getxattr(snappath, val)
pending_clones_info = self.get_pending_clones(snapname)
info_dict = {'created_at': str(datetime.fromtimestamp(float(snap_info['created_at']))),
- 'data_pool': snap_info['data_pool'].decode('utf-8')} # type: Dict[str, Any]
+ 'data_pool': snap_info['data_pool'].decode('utf-8')}
info_dict.update(pending_clones_info);
return info_dict
except cephfs.Error as e:
diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py
index 03085d04971..55d7f945b77 100644
--- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py
+++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py
@@ -10,7 +10,6 @@ from .metadata_manager import MetadataManager
from .subvolume_attrs import SubvolumeTypes, SubvolumeStates, SubvolumeFeatures
from .op_sm import SubvolumeOpSm
from .subvolume_v1 import SubvolumeV1
-from ..template import SubvolumeTemplate
from ...exception import OpSmException, VolumeException, MetadataMgrException
from ...fs_util import listdir, create_base_dir
from ..template import SubvolumeOpType
@@ -99,7 +98,7 @@ class SubvolumeV2(SubvolumeV1):
try:
# MDS treats this as a noop for already marked subvolume
self.fs.setxattr(self.base_path, 'ceph.dir.subvolume', b'1', 0)
- except cephfs.InvalidValue as e:
+ except cephfs.InvalidValue:
raise VolumeException(-errno.EINVAL, "invalid value specified for ceph.dir.subvolume")
except cephfs.Error as e:
raise VolumeException(-e.args[0], e.args[1])
@@ -159,7 +158,7 @@ class SubvolumeV2(SubvolumeV1):
subvolume_type = SubvolumeTypes.TYPE_NORMAL
try:
initial_state = SubvolumeOpSm.get_init_state(subvolume_type)
- except OpSmException as oe:
+ except OpSmException:
raise VolumeException(-errno.EINVAL, "subvolume creation failed: internal error")
retained = self.retained
@@ -207,7 +206,7 @@ class SubvolumeV2(SubvolumeV1):
subvolume_type = SubvolumeTypes.TYPE_CLONE
try:
initial_state = SubvolumeOpSm.get_init_state(subvolume_type)
- except OpSmException as oe:
+ except OpSmException:
raise VolumeException(-errno.EINVAL, "clone failed: internal error")
retained = self.retained
@@ -308,13 +307,17 @@ class SubvolumeV2(SubvolumeV1):
op_type.value, self.subvolname, etype.value))
estate = self.state
- if op_type not in self.allowed_ops_by_state(estate) and estate == SubvolumeStates.STATE_RETAINED:
- raise VolumeException(-errno.ENOENT, "subvolume '{0}' is removed and has only snapshots retained".format(
- self.subvolname))
-
- if op_type not in self.allowed_ops_by_state(estate) and estate != SubvolumeStates.STATE_RETAINED:
- raise VolumeException(-errno.EAGAIN, "subvolume '{0}' is not ready for operation {1}".format(
- self.subvolname, op_type.value))
+ if op_type not in self.allowed_ops_by_state(estate):
+ if estate == SubvolumeStates.STATE_RETAINED:
+ raise VolumeException(
+ -errno.ENOENT,
+ f'subvolume "{self.subvolname}" is removed and has '
+ 'only snapshots retained')
+ else:
+ raise VolumeException(
+ -errno.EAGAIN,
+ f'subvolume "{self.subvolname}" is not ready for '
+ f'operation "{op_type.value}"')
if estate != SubvolumeStates.STATE_RETAINED:
subvol_path = self.path
diff --git a/src/pybind/mgr/volumes/fs/operations/volume.py b/src/pybind/mgr/volumes/fs/operations/volume.py
index 395a3fb4ea0..67fbb891cef 100644
--- a/src/pybind/mgr/volumes/fs/operations/volume.py
+++ b/src/pybind/mgr/volumes/fs/operations/volume.py
@@ -40,7 +40,7 @@ def get_pool_names(mgr, volname):
"""
fs_map = mgr.get("fs_map")
metadata_pool_id = None
- data_pool_ids = [] # type: List[int]
+ data_pool_ids: List[int] = []
for f in fs_map['filesystems']:
if volname == f['mdsmap']['fs_name']:
metadata_pool_id = f['mdsmap']['metadata_pool']
@@ -61,7 +61,7 @@ def get_pool_ids(mgr, volname):
"""
fs_map = mgr.get("fs_map")
metadata_pool_id = None
- data_pool_ids = [] # type: List[int]
+ data_pool_ids: List[int] = []
for f in fs_map['filesystems']:
if volname == f['mdsmap']['fs_name']:
metadata_pool_id = f['mdsmap']['metadata_pool']
diff --git a/src/pybind/mgr/volumes/fs/volume.py b/src/pybind/mgr/volumes/fs/volume.py
index 5c6642444b1..e19d1eafb2a 100644
--- a/src/pybind/mgr/volumes/fs/volume.py
+++ b/src/pybind/mgr/volumes/fs/volume.py
@@ -1,7 +1,6 @@
import json
import errno
import logging
-import os
import mgr_util
from typing import TYPE_CHECKING
@@ -17,7 +16,6 @@ from .operations.volume import create_volume, delete_volume, rename_volume, \
list_volumes, open_volume, get_pool_names, get_pool_ids, get_pending_subvol_deletions_count
from .operations.subvolume import open_subvol, create_subvol, remove_subvol, \
create_clone
-from .operations.trash import Trash
from .vol_spec import VolSpec
from .exception import VolumeException, ClusterError, ClusterTimeout, EvictionError
@@ -338,7 +336,7 @@ class VolumeClient(CephfsClient["Module"]):
with open_volume(self, volname) as fs_handle:
with open_group(fs_handle, self.volspec, groupname) as group:
with open_subvol(self.mgr, fs_handle, self.volspec, group, subvolname, SubvolumeOpType.EVICT) as subvolume:
- key = subvolume.evict(volname, authid)
+ subvolume.evict(volname, authid)
ret = 0, "", ""
except (VolumeException, ClusterTimeout, ClusterError, EvictionError) as e:
if isinstance(e, VolumeException):
@@ -695,7 +693,7 @@ class VolumeClient(CephfsClient["Module"]):
try:
with open_volume(self, volname) as fs_handle:
with open_group(fs_handle, self.volspec, groupname) as group:
- with open_subvol(self.mgr, fs_handle, self.volspec, group, subvolname, SubvolumeOpType.SNAP_PROTECT) as subvolume:
+ with open_subvol(self.mgr, fs_handle, self.volspec, group, subvolname, SubvolumeOpType.SNAP_PROTECT):
log.warning("snapshot protect call is deprecated and will be removed in a future release")
except VolumeException as ve:
ret = self.volume_exception_to_retval(ve)
@@ -710,7 +708,7 @@ class VolumeClient(CephfsClient["Module"]):
try:
with open_volume(self, volname) as fs_handle:
with open_group(fs_handle, self.volspec, groupname) as group:
- with open_subvol(self.mgr, fs_handle, self.volspec, group, subvolname, SubvolumeOpType.SNAP_UNPROTECT) as subvolume:
+ with open_subvol(self.mgr, fs_handle, self.volspec, group, subvolname, SubvolumeOpType.SNAP_UNPROTECT):
log.warning("snapshot unprotect call is deprecated and will be removed in a future release")
except VolumeException as ve:
ret = self.volume_exception_to_retval(ve)
@@ -962,7 +960,7 @@ class VolumeClient(CephfsClient["Module"]):
try:
with open_volume(self, volname) as fs_handle:
- with open_group(fs_handle, self.volspec, groupname) as group:
+ with open_group(fs_handle, self.volspec, groupname):
# as subvolumes are marked with the vxattr ceph.dir.subvolume deny snapshots
# at the subvolume group (see: https://tracker.ceph.com/issues/46074)
# group.create_snapshot(snapname)
diff --git a/src/pybind/rbd/setup.py b/src/pybind/rbd/setup.py
index 1f20c3ed42f..eeb33c73d49 100755
--- a/src/pybind/rbd/setup.py
+++ b/src/pybind/rbd/setup.py
@@ -14,6 +14,7 @@ else:
from distutils.ccompiler import new_compiler
from distutils.errors import CompileError, LinkError
from itertools import filterfalse, takewhile
+from packaging import version
import distutils.sysconfig
@@ -148,11 +149,22 @@ else:
sys.exit(1)
cmdclass = {}
+compiler_directives={'language_level': sys.version_info.major}
try:
from Cython.Build import cythonize
from Cython.Distutils import build_ext
+ from Cython import __version__ as cython_version
cmdclass = {'build_ext': build_ext}
+
+ # Needed for building with Cython 0.x and Cython 3 from the same file,
+ # preserving the same behavior.
+ # When Cython 0.x builds go away, replace this compiler directive with
+ # noexcept on rbd_callback_t and librbd_progress_fn_t (or consider doing
+ # something similar to except? -9000 on rbd_diff_iterate2() callback for
+ # progress callbacks to propagate exceptions).
+ if version.parse(cython_version) >= version.parse('3'):
+ compiler_directives['legacy_implicit_noexcept'] = True
except ImportError:
print("WARNING: Cython is not installed.")
@@ -197,7 +209,7 @@ setup(
**ext_args
)
],
- compiler_directives={'language_level': sys.version_info.major},
+ compiler_directives=compiler_directives,
build_dir=os.environ.get("CYTHON_BUILD_DIR", None),
**cythonize_args
),
diff --git a/src/python-common/ceph/rgw/types.py b/src/python-common/ceph/rgw/types.py
index 3f65f9da00e..ddff7e15ea3 100644
--- a/src/python-common/ceph/rgw/types.py
+++ b/src/python-common/ceph/rgw/types.py
@@ -117,7 +117,6 @@ class RGWPeriod(JSONObj):
self.epoch = period_dict['epoch']
self.master_zone = period_dict['master_zone']
self.master_zonegroup = period_dict['master_zonegroup']
- self.realm_name = period_dict['realm_name']
self.realm_id = period_dict['realm_id']
pm = period_dict['period_map']
self.zonegroups_by_id = {}
diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt
index d54c5b6afa9..00a9e4127e1 100644
--- a/src/rgw/CMakeLists.txt
+++ b/src/rgw/CMakeLists.txt
@@ -48,7 +48,6 @@ set(librgw_common_srcs
services/svc_otp.cc
services/svc_quota.cc
services/svc_sync_modules.cc
- services/svc_rados.cc
services/svc_role_rados.cc
services/svc_sys_obj.cc
services/svc_sys_obj_cache.cc
diff --git a/src/rgw/driver/daos/rgw_sal_daos.cc b/src/rgw/driver/daos/rgw_sal_daos.cc
index 3687f57b8c8..69432056579 100644
--- a/src/rgw/driver/daos/rgw_sal_daos.cc
+++ b/src/rgw/driver/daos/rgw_sal_daos.cc
@@ -179,7 +179,7 @@ int DaosUser::read_stats(const DoutPrefixProvider* dpp, optional_yield y,
/* stats - Not for first pass */
int DaosUser::read_stats_async(const DoutPrefixProvider* dpp,
- RGWGetUserStats_CB* cb) {
+ boost::intrusive_ptr<ReadStatsCB> cb) {
return DAOS_NOT_IMPLEMENTED_LOG(dpp);
}
@@ -495,7 +495,7 @@ int DaosBucket::read_stats(const DoutPrefixProvider* dpp,
int DaosBucket::read_stats_async(
const DoutPrefixProvider* dpp,
const bucket_index_layout_generation& idx_layout, int shard_id,
- RGWGetBucketStats_CB* ctx) {
+ boost::intrusive_ptr<ReadStatsCB> ctx) {
return DAOS_NOT_IMPLEMENTED_LOG(dpp);
}
diff --git a/src/rgw/driver/daos/rgw_sal_daos.h b/src/rgw/driver/daos/rgw_sal_daos.h
index 66f57006463..146ba7077a4 100644
--- a/src/rgw/driver/daos/rgw_sal_daos.h
+++ b/src/rgw/driver/daos/rgw_sal_daos.h
@@ -187,7 +187,7 @@ class DaosUser : public StoreUser {
ceph::real_time* last_stats_sync = nullptr,
ceph::real_time* last_stats_update = nullptr) override;
virtual int read_stats_async(const DoutPrefixProvider* dpp,
- RGWGetUserStats_CB* cb) override;
+ boost::intrusive_ptr<ReadStatsCB> cb) override;
virtual int complete_flush_stats(const DoutPrefixProvider* dpp,
optional_yield y) override;
virtual int read_usage(
@@ -309,7 +309,7 @@ class DaosBucket : public StoreBucket {
virtual int read_stats_async(const DoutPrefixProvider* dpp,
const bucket_index_layout_generation& idx_layout,
int shard_id,
- RGWGetBucketStats_CB* ctx) override;
+ boost::intrusive_ptr<ReadStatsCB> ctx) override;
virtual int sync_user_stats(const DoutPrefixProvider* dpp,
optional_yield y) override;
virtual int check_bucket_shards(const DoutPrefixProvider* dpp) override;
diff --git a/src/rgw/driver/dbstore/common/dbstore.h b/src/rgw/driver/dbstore/common/dbstore.h
index fc436d55df8..8cf6f70f751 100644
--- a/src/rgw/driver/dbstore/common/dbstore.h
+++ b/src/rgw/driver/dbstore/common/dbstore.h
@@ -1771,14 +1771,13 @@ class DB {
rgw_obj_key end_marker;
std::string ns;
bool enforce_ns;
- RGWAccessListFilter* access_list_filter;
+ rgw::AccessListFilter access_list_filter;
RGWBucketListNameFilter force_check_filter;
bool list_versions;
bool allow_unordered;
Params() :
enforce_ns(true),
- access_list_filter(nullptr),
list_versions(false),
allow_unordered(false)
{}
diff --git a/src/rgw/driver/motr/rgw_sal_motr.cc b/src/rgw/driver/motr/rgw_sal_motr.cc
index 3f5a46e4bc9..08053b9b90c 100644
--- a/src/rgw/driver/motr/rgw_sal_motr.cc
+++ b/src/rgw/driver/motr/rgw_sal_motr.cc
@@ -289,7 +289,7 @@ int MotrUser::read_stats(const DoutPrefixProvider *dpp,
}
/* stats - Not for first pass */
-int MotrUser::read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB *cb)
+int MotrUser::read_stats_async(const DoutPrefixProvider *dpp, boost::intrusive_ptr<ReadStatsCB> cb)
{
return 0;
}
@@ -803,7 +803,7 @@ int MotrBucket::create_multipart_indices()
int MotrBucket::read_stats_async(const DoutPrefixProvider *dpp,
const bucket_index_layout_generation& idx_layout,
- int shard_id, RGWGetBucketStats_CB *ctx)
+ int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx)
{
return 0;
}
diff --git a/src/rgw/driver/motr/rgw_sal_motr.h b/src/rgw/driver/motr/rgw_sal_motr.h
index ecfabfc1518..ab0fc1d5283 100644
--- a/src/rgw/driver/motr/rgw_sal_motr.h
+++ b/src/rgw/driver/motr/rgw_sal_motr.h
@@ -241,7 +241,7 @@ class MotrUser : public StoreUser {
optional_yield y, RGWStorageStats* stats,
ceph::real_time *last_stats_sync = nullptr,
ceph::real_time *last_stats_update = nullptr) override;
- virtual int read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb) override;
+ virtual int read_stats_async(const DoutPrefixProvider *dpp, boost::intrusive_ptr<ReadStatsCB> cb) override;
virtual int complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) override;
virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
bool* is_truncated, RGWUsageIter& usage_iter,
@@ -372,7 +372,7 @@ class MotrBucket : public StoreBucket {
bool *syncstopped = nullptr) override;
virtual int read_stats_async(const DoutPrefixProvider *dpp,
const bucket_index_layout_generation& idx_layout,
- int shard_id, RGWGetBucketStats_CB* ctx) override;
+ int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx) override;
int sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y,
RGWBucketEnt* ent) override;
int check_bucket_shards(const DoutPrefixProvider *dpp,
diff --git a/src/rgw/driver/posix/rgw_sal_posix.cc b/src/rgw/driver/posix/rgw_sal_posix.cc
index 23f9f83dabc..5c1e50ca5c9 100644
--- a/src/rgw/driver/posix/rgw_sal_posix.cc
+++ b/src/rgw/driver/posix/rgw_sal_posix.cc
@@ -916,7 +916,7 @@ int POSIXBucket::set_acl(const DoutPrefixProvider* dpp,
acl.encode(aclbl);
attrs[RGW_ATTR_ACL] = aclbl;
- info.owner = acl.get_owner().get_id();
+ info.owner = acl.get_owner().id;
return write_attrs(dpp, y);
}
@@ -958,7 +958,7 @@ int POSIXBucket::read_stats(const DoutPrefixProvider *dpp,
int POSIXBucket::read_stats_async(const DoutPrefixProvider *dpp,
const bucket_index_layout_generation& idx_layout,
- int shard_id, RGWGetBucketStats_CB* ctx)
+ int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx)
{
return 0;
}
diff --git a/src/rgw/driver/posix/rgw_sal_posix.h b/src/rgw/driver/posix/rgw_sal_posix.h
index 1599808beff..3b76295b0c4 100644
--- a/src/rgw/driver/posix/rgw_sal_posix.h
+++ b/src/rgw/driver/posix/rgw_sal_posix.h
@@ -195,7 +195,7 @@ public:
bool* syncstopped = nullptr) override;
virtual int read_stats_async(const DoutPrefixProvider *dpp,
const bucket_index_layout_generation& idx_layout,
- int shard_id, RGWGetBucketStats_CB* ctx) override;
+ int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx) override;
virtual int sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y,
RGWBucketEnt* ent) override;
virtual int check_bucket_shards(const DoutPrefixProvider* dpp,
diff --git a/src/rgw/driver/rados/rgw_bucket.cc b/src/rgw/driver/rados/rgw_bucket.cc
index bea3e9b09e5..64d8e31cc3c 100644
--- a/src/rgw/driver/rados/rgw_bucket.cc
+++ b/src/rgw/driver/rados/rgw_bucket.cc
@@ -882,17 +882,6 @@ int RGWBucket::sync(RGWBucketAdminOpState& op_state, const DoutPrefixProvider *d
}
-int RGWBucket::policy_bl_to_stream(bufferlist& bl, ostream& o)
-{
- RGWAccessControlPolicy_S3 policy(g_ceph_context);
- int ret = decode_bl(bl, policy);
- if (ret < 0) {
- ldout(driver->ctx(),0) << "failed to decode RGWAccessControlPolicy" << dendl;
- }
- policy.to_xml(o);
- return 0;
-}
-
int rgw_object_get_attr(const DoutPrefixProvider *dpp,
rgw::sal::Driver* driver, rgw::sal::Object* obj,
const char* attr_name, bufferlist& out_bl, optional_yield y)
@@ -961,7 +950,7 @@ int RGWBucketAdminOp::get_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState
int RGWBucketAdminOp::get_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state,
RGWFormatterFlusher& flusher, const DoutPrefixProvider *dpp, optional_yield y)
{
- RGWAccessControlPolicy policy(driver->ctx());
+ RGWAccessControlPolicy policy;
int ret = get_policy(driver, op_state, policy, dpp, y);
if (ret < 0)
@@ -983,13 +972,13 @@ int RGWBucketAdminOp::get_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState
int RGWBucketAdminOp::dump_s3_policy(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_state,
ostream& os, const DoutPrefixProvider *dpp, optional_yield y)
{
- RGWAccessControlPolicy_S3 policy(driver->ctx());
+ RGWAccessControlPolicy policy;
int ret = get_policy(driver, op_state, policy, dpp, y);
if (ret < 0)
return ret;
- policy.to_xml(os);
+ rgw::s3::write_policy_xml(policy, os);
return 0;
}
@@ -1068,9 +1057,9 @@ int RGWBucketAdminOp::link(rgw::sal::Driver* driver, RGWBucketAdminOpState& op_s
return -EIO;
}
- int r = static_cast<rgw::sal::RadosStore*>(driver)->ctl()->bucket->unlink_bucket(owner.get_id(), old_bucket->get_info().bucket, y, dpp, false);
+ int r = static_cast<rgw::sal::RadosStore*>(driver)->ctl()->bucket->unlink_bucket(owner.id, old_bucket->get_info().bucket, y, dpp, false);
if (r < 0) {
- set_err_msg(err, "could not unlink policy from user " + owner.get_id().to_str());
+ set_err_msg(err, "could not unlink policy from user " + owner.id.to_str());
return r;
}
diff --git a/src/rgw/driver/rados/rgw_bucket.h b/src/rgw/driver/rados/rgw_bucket.h
index 706683e6bd1..5af10b524de 100644
--- a/src/rgw/driver/rados/rgw_bucket.h
+++ b/src/rgw/driver/rados/rgw_bucket.h
@@ -355,7 +355,6 @@ public:
int set_quota(RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, optional_yield y, std::string *err_msg = NULL);
int remove_object(const DoutPrefixProvider *dpp, RGWBucketAdminOpState& op_state, optional_yield y, std::string *err_msg = NULL);
- int policy_bl_to_stream(bufferlist& bl, std::ostream& o);
int get_policy(RGWBucketAdminOpState& op_state, RGWAccessControlPolicy& policy, optional_yield y, const DoutPrefixProvider *dpp);
int sync(RGWBucketAdminOpState& op_state, const DoutPrefixProvider *dpp, optional_yield y, std::string *err_msg = NULL);
diff --git a/src/rgw/driver/rados/rgw_cr_rados.cc b/src/rgw/driver/rados/rgw_cr_rados.cc
index e7174908dd7..2a4a02f29ad 100644
--- a/src/rgw/driver/rados/rgw_cr_rados.cc
+++ b/src/rgw/driver/rados/rgw_cr_rados.cc
@@ -152,7 +152,7 @@ int RGWSimpleRadosReadAttrsCR::send_request(const DoutPrefixProvider *dpp)
}
cn = stack->create_completion_notifier();
- return ref.pool.ioctx().aio_operate(ref.obj.oid, cn->completion(), &op,
+ return ref.ioctx.aio_operate(ref.obj.oid, cn->completion(), &op,
nullptr);
}
@@ -234,7 +234,7 @@ int RGWAsyncLockSystemObj::_send_request(const DoutPrefixProvider *dpp)
l.set_cookie(cookie);
l.set_may_renew(true);
- return l.lock_exclusive(&ref.pool.ioctx(), ref.obj.oid);
+ return l.lock_exclusive(&ref.ioctx, ref.obj.oid);
}
RGWAsyncLockSystemObj::RGWAsyncLockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store,
@@ -260,7 +260,7 @@ int RGWAsyncUnlockSystemObj::_send_request(const DoutPrefixProvider *dpp)
l.set_cookie(cookie);
- return l.unlock(&ref.pool.ioctx(), ref.obj.oid);
+ return l.unlock(&ref.ioctx, ref.obj.oid);
}
RGWAsyncUnlockSystemObj::RGWAsyncUnlockSystemObj(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, rgw::sal::RadosStore* _store,
@@ -303,7 +303,7 @@ int RGWRadosSetOmapKeysCR::send_request(const DoutPrefixProvider *dpp)
op.omap_set(entries);
cn = stack->create_completion_notifier();
- return ref.pool.ioctx().aio_operate(ref.obj.oid, cn->completion(), &op);
+ return ref.ioctx.aio_operate(ref.obj.oid, cn->completion(), &op);
}
int RGWRadosSetOmapKeysCR::request_complete()
@@ -341,7 +341,7 @@ int RGWRadosGetOmapKeysCR::send_request(const DoutPrefixProvider *dpp) {
op.omap_get_keys2(marker, max_entries, &result->entries, &result->more, nullptr);
cn = stack->create_completion_notifier(result);
- return result->ref.pool.ioctx().aio_operate(result->ref.obj.oid, cn->completion(), &op, NULL);
+ return result->ref.ioctx.aio_operate(result->ref.obj.oid, cn->completion(), &op, NULL);
}
int RGWRadosGetOmapKeysCR::request_complete()
@@ -379,7 +379,7 @@ int RGWRadosGetOmapValsCR::send_request(const DoutPrefixProvider *dpp) {
op.omap_get_vals2(marker, max_entries, &result->entries, &result->more, nullptr);
cn = stack->create_completion_notifier(result);
- return result->ref.pool.ioctx().aio_operate(result->ref.obj.oid, cn->completion(), &op, NULL);
+ return result->ref.ioctx.aio_operate(result->ref.obj.oid, cn->completion(), &op, NULL);
}
int RGWRadosGetOmapValsCR::request_complete()
@@ -414,7 +414,7 @@ int RGWRadosRemoveOmapKeysCR::send_request(const DoutPrefixProvider *dpp) {
op.omap_rm_keys(keys);
cn = stack->create_completion_notifier();
- return ref.pool.ioctx().aio_operate(ref.obj.oid, cn->completion(), &op);
+ return ref.ioctx.aio_operate(ref.obj.oid, cn->completion(), &op);
}
int RGWRadosRemoveOmapKeysCR::request_complete()
@@ -476,22 +476,11 @@ RGWRadosRemoveOidCR::RGWRadosRemoveOidCR(rgw::sal::RadosStore* store,
}
RGWRadosRemoveOidCR::RGWRadosRemoveOidCR(rgw::sal::RadosStore* store,
- RGWSI_RADOS::Obj& obj,
+ rgw_rados_ref obj,
RGWObjVersionTracker* objv_tracker)
: RGWSimpleCoroutine(store->ctx()),
- ioctx(librados::IoCtx(obj.get_ref().pool.ioctx())),
- oid(obj.get_ref().obj.oid),
- objv_tracker(objv_tracker)
-{
- set_description() << "remove dest=" << oid;
-}
-
-RGWRadosRemoveOidCR::RGWRadosRemoveOidCR(rgw::sal::RadosStore* store,
- RGWSI_RADOS::Obj&& obj,
- RGWObjVersionTracker* objv_tracker)
- : RGWSimpleCoroutine(store->ctx()),
- ioctx(std::move(obj.get_ref().pool.ioctx())),
- oid(std::move(obj.get_ref().obj.oid)),
+ ioctx(std::move(obj.ioctx)),
+ oid(std::move(obj.obj.oid)),
objv_tracker(objv_tracker)
{
set_description() << "remove dest=" << oid;
@@ -917,7 +906,7 @@ int RGWAsyncRemoveObj::_send_request(const DoutPrefixProvider *dpp)
std::unique_ptr<rgw::sal::Object::DeleteOp> del_op = obj->get_delete_op();
- del_op->params.bucket_owner = bucket->get_info().owner;
+ del_op->params.bucket_owner.id = bucket->get_info().owner;
del_op->params.obj_owner = policy.get_owner();
if (del_if_older) {
del_op->params.unmod_since = timestamp;
@@ -927,8 +916,8 @@ int RGWAsyncRemoveObj::_send_request(const DoutPrefixProvider *dpp)
}
del_op->params.olh_epoch = versioned_epoch;
del_op->params.marker_version_id = marker_version_id;
- del_op->params.obj_owner.set_id(rgw_user(owner));
- del_op->params.obj_owner.set_name(owner_display_name);
+ del_op->params.obj_owner.id = rgw_user(owner);
+ del_op->params.obj_owner.display_name = owner_display_name;
del_op->params.mtime = timestamp;
del_op->params.high_precision_time = true;
del_op->params.zones_trace = &zones_trace;
@@ -1131,7 +1120,7 @@ int RGWRadosNotifyCR::send_request(const DoutPrefixProvider *dpp)
set_status() << "sending request";
cn = stack->create_completion_notifier();
- return ref.pool.ioctx().aio_notify(ref.obj.oid, cn->completion(), request,
+ return ref.ioctx.aio_notify(ref.obj.oid, cn->completion(), request,
timeout_ms, response);
}
diff --git a/src/rgw/driver/rados/rgw_cr_rados.h b/src/rgw/driver/rados/rgw_cr_rados.h
index 676fd3d3cae..2cf99f98cc8 100644
--- a/src/rgw/driver/rados/rgw_cr_rados.h
+++ b/src/rgw/driver/rados/rgw_cr_rados.h
@@ -446,8 +446,7 @@ public:
op.read(0, -1, &bl, nullptr);
cn = stack->create_completion_notifier();
- return ref.pool.ioctx().aio_operate(ref.obj.oid, cn->completion(), &op,
- nullptr);
+ return ref.ioctx.aio_operate(ref.obj.oid, cn->completion(), &op, nullptr);
}
int request_complete() {
@@ -557,7 +556,7 @@ public:
op.write_full(bl);
cn = stack->create_completion_notifier();
- return ref.pool.ioctx().aio_operate(ref.obj.oid, cn->completion(), &op);
+ return ref.ioctx.aio_operate(ref.obj.oid, cn->completion(), &op);
}
int request_complete() override {
@@ -624,7 +623,7 @@ public:
return 0;
}
- return ref.pool.ioctx().aio_operate(ref.obj.oid, cn->completion(), &op);
+ return ref.ioctx.aio_operate(ref.obj.oid, cn->completion(), &op);
}
int request_complete() override {
@@ -754,11 +753,7 @@ public:
RGWObjVersionTracker* objv_tracker = nullptr);
RGWRadosRemoveOidCR(rgw::sal::RadosStore* store,
- RGWSI_RADOS::Obj& obj,
- RGWObjVersionTracker* objv_tracker = nullptr);
-
- RGWRadosRemoveOidCR(rgw::sal::RadosStore* store,
- RGWSI_RADOS::Obj&& obj,
+ rgw_rados_ref obj,
RGWObjVersionTracker* objv_tracker = nullptr);
int send_request(const DoutPrefixProvider *dpp) override;
diff --git a/src/rgw/driver/rados/rgw_d3n_datacache.cc b/src/rgw/driver/rados/rgw_d3n_datacache.cc
index 18d213cf6c3..c81954fce1c 100644
--- a/src/rgw/driver/rados/rgw_d3n_datacache.cc
+++ b/src/rgw/driver/rados/rgw_d3n_datacache.cc
@@ -104,7 +104,7 @@ void D3nDataCache::init(CephContext *_cct) {
struct aioinit ainit{0};
ainit.aio_threads = cct->_conf.get_val<int64_t>("rgw_d3n_libaio_aio_threads");
ainit.aio_num = cct->_conf.get_val<int64_t>("rgw_d3n_libaio_aio_num");
- ainit.aio_idle_time = 10;
+ ainit.aio_idle_time = 5;
aio_init(&ainit);
#endif
}
diff --git a/src/rgw/driver/rados/rgw_d3n_datacache.h b/src/rgw/driver/rados/rgw_d3n_datacache.h
index 196f892329f..58ac95d093d 100644
--- a/src/rgw/driver/rados/rgw_d3n_datacache.h
+++ b/src/rgw/driver/rados/rgw_d3n_datacache.h
@@ -195,10 +195,10 @@ int D3nRGWDataCache<T>::get_obj_iterate_cb(const DoutPrefixProvider *dpp, const
return 0;
}
- auto obj = d->rgwrados->svc.rados->obj(read_obj);
- r = obj.open(dpp);
+ rgw_rados_ref ref;
+ r = rgw_get_rados_ref(dpp, d->rgwrados->get_rados_handle(), read_obj, &ref);
if (r < 0) {
- lsubdout(g_ceph_context, rgw, 4) << "failed to open rados context for " << read_obj << dendl;
+ ldpp_dout(dpp, 4) << "failed to open rados context for " << read_obj << dendl;
return r;
}
@@ -208,8 +208,7 @@ int D3nRGWDataCache<T>::get_obj_iterate_cb(const DoutPrefixProvider *dpp, const
const uint64_t cost = len;
const uint64_t id = obj_ofs; // use logical object offset for sorting replies
- auto& ref = obj.get_ref();
- auto completed = d->aio->get(ref.obj, rgw::Aio::librados_op(ref.pool.ioctx(), std::move(op), d->yield), cost, id);
+ auto completed = d->aio->get(ref.obj, rgw::Aio::librados_op(ref.ioctx, std::move(op), d->yield), cost, id);
return d->flush(std::move(completed));
} else {
ldpp_dout(dpp, 20) << "D3nDataCache::" << __func__ << "(): oid=" << read_obj.oid << ", is_head_obj=" << is_head_obj << ", obj-ofs=" << obj_ofs << ", read_ofs=" << read_ofs << ", len=" << len << dendl;
@@ -221,20 +220,19 @@ int D3nRGWDataCache<T>::get_obj_iterate_cb(const DoutPrefixProvider *dpp, const
const uint64_t id = obj_ofs; // use logical object offset for sorting replies
oid = read_obj.oid;
- auto obj = d->rgwrados->svc.rados->obj(read_obj);
- r = obj.open(dpp);
+ rgw_rados_ref ref;
+ r = rgw_get_rados_ref(dpp, d->rgwrados->get_rados_handle(), read_obj, &ref);
if (r < 0) {
- lsubdout(g_ceph_context, rgw, 0) << "D3nDataCache: Error: failed to open rados context for " << read_obj << ", r=" << r << dendl;
+ ldpp_dout(dpp, 4) << "failed to open rados context for " << read_obj << dendl;
return r;
}
- auto& ref = obj.get_ref();
const bool is_compressed = (astate->attrset.find(RGW_ATTR_COMPRESSION) != astate->attrset.end());
const bool is_encrypted = (astate->attrset.find(RGW_ATTR_CRYPT_MODE) != astate->attrset.end());
if (read_ofs != 0 || astate->size != astate->accounted_size || is_compressed || is_encrypted) {
d->d3n_bypass_cache_write = true;
lsubdout(g_ceph_context, rgw, 5) << "D3nDataCache: " << __func__ << "(): Note - bypassing datacache: oid=" << read_obj.oid << ", read_ofs!=0 = " << read_ofs << ", size=" << astate->size << " != accounted_size=" << astate->accounted_size << ", is_compressed=" << is_compressed << ", is_encrypted=" << is_encrypted << dendl;
- auto completed = d->aio->get(ref.obj, rgw::Aio::librados_op(ref.pool.ioctx(), std::move(op), d->yield), cost, id);
+ auto completed = d->aio->get(ref.obj, rgw::Aio::librados_op(ref.ioctx, std::move(op), d->yield), cost, id);
r = d->flush(std::move(completed));
return r;
}
@@ -251,7 +249,7 @@ int D3nRGWDataCache<T>::get_obj_iterate_cb(const DoutPrefixProvider *dpp, const
} else {
// Write To Cache
ldpp_dout(dpp, 20) << "D3nDataCache: " << __func__ << "(): WRITE TO CACHE: oid=" << read_obj.oid << ", obj-ofs=" << obj_ofs << ", read_ofs=" << read_ofs << " len=" << len << dendl;
- auto completed = d->aio->get(ref.obj, rgw::Aio::librados_op(ref.pool.ioctx(), std::move(op), d->yield), cost, id);
+ auto completed = d->aio->get(ref.obj, rgw::Aio::librados_op(ref.ioctx, std::move(op), d->yield), cost, id);
return d->flush(std::move(completed));
}
}
diff --git a/src/rgw/driver/rados/rgw_data_sync.cc b/src/rgw/driver/rados/rgw_data_sync.cc
index 02965f9073d..7b273e68129 100644
--- a/src/rgw/driver/rados/rgw_data_sync.cc
+++ b/src/rgw/driver/rados/rgw_data_sync.cc
@@ -1457,7 +1457,7 @@ public:
}
if (complete->timestamp != ceph::real_time{}) {
tn->log(10, SSTR("writing " << *complete << " to error repo for retry"));
- yield call(rgw::error_repo::write_cr(sync_env->driver->svc()->rados, error_repo,
+ yield call(rgw::error_repo::write_cr(sync_env->driver->getRados()->get_rados_handle(), error_repo,
rgw::error_repo::encode_key(complete->bs, complete->gen),
complete->timestamp));
if (retcode < 0) {
@@ -1465,7 +1465,7 @@ public:
}
}
} else if (complete->retry) {
- yield call(rgw::error_repo::remove_cr(sync_env->driver->svc()->rados, error_repo,
+ yield call(rgw::error_repo::remove_cr(sync_env->driver->getRados()->get_rados_handle(), error_repo,
rgw::error_repo::encode_key(complete->bs, complete->gen),
complete->timestamp));
if (retcode < 0) {
@@ -1529,7 +1529,7 @@ public:
if (retcode == -ENOENT) {
// don't retry if bucket instance does not exist
tn->log(10, SSTR("bucket instance or log layout does not exist on source for bucket " << source_bs.bucket));
- yield call(rgw::error_repo::remove_cr(sync_env->driver->svc()->rados, error_repo,
+ yield call(rgw::error_repo::remove_cr(sync_env->driver->getRados()->get_rados_handle(), error_repo,
error_marker, timestamp));
return set_cr_done();
} else if (retcode < 0) {
@@ -1544,7 +1544,7 @@ public:
pool = sync_env->svc->zone->get_zone_params().log_pool;
error_repo = datalog_oid_for_error_repo(sc, sync_env->driver, pool, source_bs);
tn->log(10, SSTR("writing shard_id " << sid << " of gen " << each->gen << " to error repo for retry"));
- yield_spawn_window(rgw::error_repo::write_cr(sync_env->driver->svc()->rados, error_repo,
+ yield_spawn_window(rgw::error_repo::write_cr(sync_env->driver->getRados()->get_rados_handle(), error_repo,
rgw::error_repo::encode_key(bs, each->gen),
timestamp), sc->lcc.adj_concurrency(cct->_conf->rgw_data_sync_spawn_window),
[&](uint64_t stack_id, int ret) {
@@ -1563,7 +1563,7 @@ public:
});
// once everything succeeds, remove the full sync obligation from the error repo
- yield call(rgw::error_repo::remove_cr(sync_env->driver->svc()->rados, error_repo,
+ yield call(rgw::error_repo::remove_cr(sync_env->driver->getRados()->get_rados_handle(), error_repo,
error_marker, timestamp));
return set_cr_done();
}
@@ -1648,7 +1648,7 @@ public:
if (retcode < 0) {
tn->log(10, SSTR("full sync: failed to read remote bucket info. Writing "
<< source_bs.shard_id << " to error repo for retry"));
- yield call(rgw::error_repo::write_cr(sync_env->driver->svc()->rados, error_repo,
+ yield call(rgw::error_repo::write_cr(sync_env->driver->getRados()->get_rados_handle(), error_repo,
rgw::error_repo::encode_key(source_bs, std::nullopt),
timestamp));
if (retcode < 0) {
@@ -1670,7 +1670,7 @@ public:
timestamp = timestamp_for_bucket_shard(sync_env->driver, sync_status, source_bs);
if (retcode < 0) {
tn->log(10, SSTR("Write " << source_bs.shard_id << " to error repo for retry"));
- yield_spawn_window(rgw::error_repo::write_cr(sync_env->driver->svc()->rados, error_repo,
+ yield_spawn_window(rgw::error_repo::write_cr(sync_env->driver->getRados()->get_rados_handle(), error_repo,
rgw::error_repo::encode_key(source_bs, each->gen),
timestamp), sc->lcc.adj_concurrency(cct->_conf->rgw_data_sync_spawn_window), std::nullopt);
} else {
@@ -2016,7 +2016,7 @@ public:
}
if (retcode < 0) {
tn->log(1, SSTR("failed to parse bucket shard: " << error_marker));
- spawn(rgw::error_repo::remove_cr(sc->env->driver->svc()->rados,
+ spawn(rgw::error_repo::remove_cr(sc->env->driver->getRados()->get_rados_handle(),
error_repo, error_marker,
entry_timestamp),
false);
@@ -2122,9 +2122,9 @@ public:
} while (true);
drain_all();
- yield marker_tracker->flush();
if (lost_bid) {
+ yield call(marker_tracker->flush());
return set_cr_error(-EBUSY);
} else if (lost_lock) {
return set_cr_error(-ECANCELED);
@@ -2675,8 +2675,6 @@ public:
static int policy_from_attrs(CephContext *cct,
const map<string, bufferlist>& attrs,
RGWAccessControlPolicy *acl) {
- acl->set_ctx(cct);
-
auto aiter = attrs.find(RGW_ATTR_ACL);
if (aiter == attrs.end()) {
return -ENOENT;
@@ -2727,8 +2725,8 @@ bool RGWUserPermHandler::Bucket::verify_bucket_permission(int perm)
{
return verify_bucket_permission_no_policy(sync_env->dpp,
&(*ps),
- &info->user_acl,
- &bucket_acl,
+ info->user_acl,
+ bucket_acl,
perm);
}
@@ -2744,8 +2742,8 @@ bool RGWUserPermHandler::Bucket::verify_object_permission(const map<string, buff
return verify_bucket_permission_no_policy(sync_env->dpp,
&(*ps),
- &bucket_acl,
- &obj_acl,
+ bucket_acl,
+ obj_acl,
perm);
}
@@ -5160,8 +5158,11 @@ int RGWBucketShardIncrementalSyncCR::operate(const DoutPrefixProvider *dpp)
}
yield {
// delete the shard status object
- auto status_obj = sync_env->svc->rados->obj(marker_tracker.get_obj());
- retcode = status_obj.open(dpp);
+ rgw_rados_ref status_obj;
+ retcode = rgw_get_rados_ref(dpp,
+ sync_env->driver->getRados()->get_rados_handle(),
+ marker_tracker.get_obj(),
+ &status_obj);
if (retcode < 0) {
return set_cr_error(retcode);
}
@@ -6009,7 +6010,7 @@ int RGWSyncBucketCR::operate(const DoutPrefixProvider *dpp)
// use the error repo and sync status timestamp from the datalog shard corresponding to source_bs
error_repo = datalog_oid_for_error_repo(sc, sc->env->driver,
pool, source_bs);
- yield call(rgw::error_repo::write_cr(sc->env->driver->svc()->rados, error_repo,
+ yield call(rgw::error_repo::write_cr(sc->env->driver->getRados()->get_rados_handle(), error_repo,
rgw::error_repo::encode_key(source_bs, current_gen),
ceph::real_clock::zero()));
if (retcode < 0) {
@@ -6068,7 +6069,7 @@ int RGWBucketPipeSyncStatusManager::do_init(const DoutPrefixProvider *dpp,
}
sync_module.reset(new RGWDefaultSyncModuleInstance());
- auto async_rados = driver->svc()->rados->get_async_processor();
+ auto async_rados = driver->svc()->async_processor;
sync_env.init(this, driver->ctx(), driver,
driver->svc(), async_rados, &http_manager,
@@ -6680,7 +6681,7 @@ int rgw_read_bucket_inc_sync_status(const DoutPrefixProvider *dpp,
RGWDataSyncEnv env;
RGWSyncModuleInstanceRef module; // null sync module
- env.init(dpp, driver->ctx(), driver, driver->svc(), driver->svc()->rados->get_async_processor(),
+ env.init(dpp, driver->ctx(), driver, driver->svc(), driver->svc()->async_processor,
nullptr, nullptr, nullptr, module, nullptr);
RGWDataSyncCtx sc;
diff --git a/src/rgw/driver/rados/rgw_lc_tier.cc b/src/rgw/driver/rados/rgw_lc_tier.cc
index 2dd8a6625fc..67df0244595 100644
--- a/src/rgw/driver/rados/rgw_lc_tier.cc
+++ b/src/rgw/driver/rados/rgw_lc_tier.cc
@@ -464,7 +464,6 @@ int RGWLCStreamRead::init_rest_obj() {
*/
init_headers(attrs, rest_obj.attrs);
- rest_obj.acls.set_ctx(cct);
const auto aiter = attrs.find(RGW_ATTR_ACL);
if (aiter != attrs.end()) {
bufferlist& bl = aiter->second;
diff --git a/src/rgw/driver/rados/rgw_object_expirer_core.cc b/src/rgw/driver/rados/rgw_object_expirer_core.cc
index 72c21c1d262..269043f94df 100644
--- a/src/rgw/driver/rados/rgw_object_expirer_core.cc
+++ b/src/rgw/driver/rados/rgw_object_expirer_core.cc
@@ -32,7 +32,6 @@
#include "rgw_zone.h"
#include "rgw_sal_rados.h"
-#include "services/svc_rados.h"
#include "services/svc_zone.h"
#include "services/svc_sys_obj.h"
#include "services/svc_bi_rados.h"
@@ -108,8 +107,11 @@ int RGWObjExpStore::objexp_hint_add(const DoutPrefixProvider *dpp,
cls_timeindex_add(op, utime_t(delete_at), keyext, hebl);
string shard_name = objexp_hint_get_shardname(objexp_key_shard(obj_key, cct->_conf->rgw_objexp_hints_num_shards));
- auto obj = rados_svc->obj(rgw_raw_obj(driver->svc()->zone->get_zone_params().log_pool, shard_name));
- int r = obj.open(dpp);
+ rgw_rados_ref obj;
+ int r = rgw_get_rados_ref(dpp, driver->getRados()->get_rados_handle(),
+ { driver->svc()->zone->get_zone_params().log_pool,
+ shard_name },
+ &obj);
if (r < 0) {
ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): failed to open obj=" << obj << " (r=" << r << ")" << dendl;
return r;
@@ -131,8 +133,10 @@ int RGWObjExpStore::objexp_hint_list(const DoutPrefixProvider *dpp,
cls_timeindex_list(op, utime_t(start_time), utime_t(end_time), marker, max_entries, entries,
out_marker, truncated);
- auto obj = rados_svc->obj(rgw_raw_obj(driver->svc()->zone->get_zone_params().log_pool, oid));
- int r = obj.open(dpp);
+ rgw_rados_ref obj;
+ int r = rgw_get_rados_ref(dpp, driver->getRados()->get_rados_handle(),
+ { driver->svc()->zone->get_zone_params().log_pool,
+ oid }, &obj);
if (r < 0) {
ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): failed to open obj=" << obj << " (r=" << r << ")" << dendl;
return r;
@@ -163,7 +167,7 @@ static int cls_timeindex_trim_repeat(const DoutPrefixProvider *dpp,
do {
librados::ObjectWriteOperation op;
cls_timeindex_trim(op, from_time, to_time, from_marker, to_marker);
- int r = rgw_rados_operate(dpp, ref.pool.ioctx(), oid, &op, null_yield);
+ int r = rgw_rados_operate(dpp, ref.ioctx, oid, &op, null_yield);
if (r == -ENODATA)
done = true;
else if (r < 0)
@@ -180,15 +184,17 @@ int RGWObjExpStore::objexp_hint_trim(const DoutPrefixProvider *dpp,
const string& from_marker,
const string& to_marker, optional_yield y)
{
- auto obj = rados_svc->obj(rgw_raw_obj(driver->svc()->zone->get_zone_params().log_pool, oid));
- int r = obj.open(dpp);
- if (r < 0) {
- ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): failed to open obj=" << obj << " (r=" << r << ")" << dendl;
- return r;
+ rgw_rados_ref ref;
+ auto ret = rgw_get_rados_ref(dpp, driver->getRados()->get_rados_handle(),
+ {driver->svc()->zone->get_zone_params().log_pool, oid},
+ &ref);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): failed to open oid="
+ << oid << " (r=" << ret << ")" << dendl;
+ return ret;
}
- auto& ref = obj.get_ref();
- int ret = cls_timeindex_trim_repeat(dpp, ref, oid, utime_t(start_time), utime_t(end_time),
- from_marker, to_marker, y);
+ ret = cls_timeindex_trim_repeat(dpp, ref, oid, utime_t(start_time), utime_t(end_time),
+ from_marker, to_marker, y);
if ((ret < 0 ) && (ret != -ENOENT)) {
return ret;
}
diff --git a/src/rgw/driver/rados/rgw_object_expirer_core.h b/src/rgw/driver/rados/rgw_object_expirer_core.h
index b616570b82f..d23457fe75f 100644
--- a/src/rgw/driver/rados/rgw_object_expirer_core.h
+++ b/src/rgw/driver/rados/rgw_object_expirer_core.h
@@ -30,19 +30,16 @@
#include "rgw_sal_rados.h"
-class RGWSI_RADOS;
class RGWSI_Zone;
class RGWBucketInfo;
class cls_timeindex_entry;
class RGWObjExpStore {
CephContext *cct;
- RGWSI_RADOS *rados_svc;
rgw::sal::RadosStore* driver;
public:
- RGWObjExpStore(CephContext *_cct, RGWSI_RADOS *_rados_svc, rgw::sal::RadosStore* _driver) : cct(_cct),
- rados_svc(_rados_svc),
- driver(_driver) {}
+ RGWObjExpStore(CephContext *_cct, rgw::sal::RadosStore* _driver) : cct(_cct),
+ driver(_driver) {}
int objexp_hint_add(const DoutPrefixProvider *dpp,
const ceph::real_time& delete_at,
@@ -101,7 +98,7 @@ protected:
public:
explicit RGWObjectExpirer(rgw::sal::Driver* _driver)
: driver(_driver),
- exp_store(_driver->ctx(), static_cast<rgw::sal::RadosStore*>(driver)->svc()->rados, static_cast<rgw::sal::RadosStore*>(driver)),
+ exp_store(_driver->ctx(), static_cast<rgw::sal::RadosStore*>(driver)),
worker(NULL) {
}
~RGWObjectExpirer() {
diff --git a/src/rgw/driver/rados/rgw_period.cc b/src/rgw/driver/rados/rgw_period.cc
index 61602b354e2..4a16faccefb 100644
--- a/src/rgw/driver/rados/rgw_period.cc
+++ b/src/rgw/driver/rados/rgw_period.cc
@@ -154,7 +154,7 @@ static int read_sync_status(const DoutPrefixProvider *dpp, rgw::sal::Driver* dri
{
rgw::sal::RadosStore* rados_store = static_cast<rgw::sal::RadosStore*>(driver);
// initialize a sync status manager to read the status
- RGWMetaSyncStatusManager mgr(rados_store, rados_store->svc()->rados->get_async_processor());
+ RGWMetaSyncStatusManager mgr(rados_store, rados_store->svc()->async_processor);
int r = mgr.init(dpp);
if (r < 0) {
return r;
diff --git a/src/rgw/driver/rados/rgw_pubsub_push.cc b/src/rgw/driver/rados/rgw_pubsub_push.cc
index bdb24ce9ad1..05dc9e65d0e 100644
--- a/src/rgw/driver/rados/rgw_pubsub_push.cc
+++ b/src/rgw/driver/rados/rgw_pubsub_push.cc
@@ -115,6 +115,55 @@ public:
}
};
+namespace {
+// this allows waiting untill "finish()" is called from a different thread
+// waiting could be blocking the waiting thread or yielding, depending
+// with compilation flag support and whether the optional_yield is set
+class Waiter {
+ using Signature = void(boost::system::error_code);
+ using Completion = ceph::async::Completion<Signature>;
+ using CompletionInit = boost::asio::async_completion<yield_context, Signature>;
+ std::unique_ptr<Completion> completion = nullptr;
+ int ret;
+
+ bool done = false;
+ mutable std::mutex lock;
+ mutable std::condition_variable cond;
+
+public:
+ int wait(optional_yield y) {
+ std::unique_lock l{lock};
+ if (done) {
+ return ret;
+ }
+ if (y) {
+ boost::system::error_code ec;
+ auto&& token = y.get_yield_context()[ec];
+ CompletionInit init(token);
+ completion = Completion::create(y.get_io_context().get_executor(),
+ std::move(init.completion_handler));
+ l.unlock();
+ init.result.get();
+ return -ec.value();
+ }
+ cond.wait(l, [this]{return (done==true);});
+ return ret;
+ }
+
+ void finish(int r) {
+ std::unique_lock l{lock};
+ ret = r;
+ done = true;
+ if (completion) {
+ boost::system::error_code ec(-ret, boost::system::system_category());
+ Completion::post(std::move(completion), ec);
+ } else {
+ cond.notify_all();
+ }
+ }
+};
+} // namespace
+
#ifdef WITH_RADOSGW_AMQP_ENDPOINT
class RGWPubSubAMQPEndpoint : public RGWPubSubEndpoint {
private:
@@ -187,71 +236,17 @@ public:
}
}
- // this allows waiting untill "finish()" is called from a different thread
- // waiting could be blocking the waiting thread or yielding, depending
- // with compilation flag support and whether the optional_yield is set
- class Waiter {
- using Signature = void(boost::system::error_code);
- using Completion = ceph::async::Completion<Signature>;
- std::unique_ptr<Completion> completion = nullptr;
- int ret;
-
- mutable std::atomic<bool> done = false;
- mutable std::mutex lock;
- mutable std::condition_variable cond;
-
- template <typename ExecutionContext, typename CompletionToken>
- auto async_wait(ExecutionContext& ctx, CompletionToken&& token) {
- boost::asio::async_completion<CompletionToken, Signature> init(token);
- auto& handler = init.completion_handler;
- {
- std::unique_lock l{lock};
- completion = Completion::create(ctx.get_executor(), std::move(handler));
- }
- return init.result.get();
- }
-
- public:
- int wait(optional_yield y) {
- if (done) {
- return ret;
- }
- if (y) {
- auto& io_ctx = y.get_io_context();
- auto& yield_ctx = y.get_yield_context();
- boost::system::error_code ec;
- async_wait(io_ctx, yield_ctx[ec]);
- return -ec.value();
- }
- std::unique_lock l(lock);
- cond.wait(l, [this]{return (done==true);});
- return ret;
- }
-
- void finish(int r) {
- std::unique_lock l{lock};
- ret = r;
- done = true;
- if (completion) {
- boost::system::error_code ec(-ret, boost::system::system_category());
- Completion::post(std::move(completion), ec);
- } else {
- cond.notify_all();
- }
- }
- };
-
int send_to_completion_async(CephContext* cct, const rgw_pubsub_s3_event& event, optional_yield y) override {
if (ack_level == ack_level_t::None) {
return amqp::publish(conn_id, topic, json_format_pubsub_event(event));
} else {
// TODO: currently broker and routable are the same - this will require different flags but the same mechanism
- // note: dynamic allocation of Waiter is needed when this is invoked from a beast coroutine
- auto w = std::unique_ptr<Waiter>(new Waiter);
+ auto w = std::make_unique<Waiter>();
const auto rc = amqp::publish_with_confirm(conn_id,
topic,
json_format_pubsub_event(event),
- std::bind(&Waiter::finish, w.get(), std::placeholders::_1));
+ [wp = w.get()](int r) { wp->finish(r);}
+ );
if (rc < 0) {
// failed to publish, does not wait for reply
return rc;
@@ -314,70 +309,16 @@ public:
}
}
- // this allows waiting untill "finish()" is called from a different thread
- // waiting could be blocking the waiting thread or yielding, depending
- // with compilation flag support and whether the optional_yield is set
- class Waiter {
- using Signature = void(boost::system::error_code);
- using Completion = ceph::async::Completion<Signature>;
- std::unique_ptr<Completion> completion = nullptr;
- int ret;
-
- mutable std::atomic<bool> done = false;
- mutable std::mutex lock;
- mutable std::condition_variable cond;
-
- template <typename ExecutionContext, typename CompletionToken>
- auto async_wait(ExecutionContext& ctx, CompletionToken&& token) {
- boost::asio::async_completion<CompletionToken, Signature> init(token);
- auto& handler = init.completion_handler;
- {
- std::unique_lock l{lock};
- completion = Completion::create(ctx.get_executor(), std::move(handler));
- }
- return init.result.get();
- }
-
- public:
- int wait(optional_yield y) {
- if (done) {
- return ret;
- }
- if (y) {
- auto& io_ctx = y.get_io_context();
- auto& yield_ctx = y.get_yield_context();
- boost::system::error_code ec;
- async_wait(io_ctx, yield_ctx[ec]);
- return -ec.value();
- }
- std::unique_lock l(lock);
- cond.wait(l, [this]{return (done==true);});
- return ret;
- }
-
- void finish(int r) {
- std::unique_lock l{lock};
- ret = r;
- done = true;
- if (completion) {
- boost::system::error_code ec(-ret, boost::system::system_category());
- Completion::post(std::move(completion), ec);
- } else {
- cond.notify_all();
- }
- }
- };
-
int send_to_completion_async(CephContext* cct, const rgw_pubsub_s3_event& event, optional_yield y) override {
if (ack_level == ack_level_t::None) {
return kafka::publish(conn_name, topic, json_format_pubsub_event(event));
} else {
- // note: dynamic allocation of Waiter is needed when this is invoked from a beast coroutine
- auto w = std::unique_ptr<Waiter>(new Waiter);
+ auto w = std::make_unique<Waiter>();
const auto rc = kafka::publish_with_confirm(conn_name,
topic,
json_format_pubsub_event(event),
- std::bind(&Waiter::finish, w.get(), std::placeholders::_1));
+ [wp = w.get()](int r) { wp->finish(r); }
+ );
if (rc < 0) {
// failed to publish, does not wait for reply
return rc;
diff --git a/src/rgw/driver/rados/rgw_putobj_processor.cc b/src/rgw/driver/rados/rgw_putobj_processor.cc
index 091adb9830c..67e8a3a998e 100644
--- a/src/rgw/driver/rados/rgw_putobj_processor.cc
+++ b/src/rgw/driver/rados/rgw_putobj_processor.cc
@@ -126,8 +126,8 @@ void RadosWriter::add_write_hint(librados::ObjectWriteOperation& op) {
int RadosWriter::set_stripe_obj(const rgw_raw_obj& raw_obj)
{
- stripe_obj = store->svc.rados->obj(raw_obj);
- return stripe_obj.open(dpp);
+ return rgw_get_rados_ref(dpp, store->get_rados_handle(), raw_obj,
+ &stripe_obj);
}
int RadosWriter::process(bufferlist&& bl, uint64_t offset)
@@ -145,8 +145,9 @@ int RadosWriter::process(bufferlist&& bl, uint64_t offset)
op.write(offset, data);
}
constexpr uint64_t id = 0; // unused
- auto& ref = stripe_obj.get_ref();
- auto c = aio->get(ref.obj, Aio::librados_op(ref.pool.ioctx(), std::move(op), y), cost, id);
+ auto c = aio->get(stripe_obj.obj, Aio::librados_op(stripe_obj.ioctx,
+ std::move(op), y),
+ cost, id);
return process_completed(c, &written);
}
@@ -160,8 +161,9 @@ int RadosWriter::write_exclusive(const bufferlist& data)
op.write_full(data);
constexpr uint64_t id = 0; // unused
- auto& ref = stripe_obj.get_ref();
- auto c = aio->get(ref.obj, Aio::librados_op(ref.pool.ioctx(), std::move(op), y), cost, id);
+ auto c = aio->get(stripe_obj.obj, Aio::librados_op(stripe_obj.ioctx,
+ std::move(op), y),
+ cost, id);
auto d = aio->drain();
c.splice(c.end(), d);
return process_completed(c, &written);
@@ -553,7 +555,7 @@ int MultipartObjectProcessor::complete(size_t accounted_size,
librados::ObjectWriteOperation op;
cls_rgw_mp_upload_part_info_update(op, p, info);
- r = rgw_rados_operate(rctx.dpp, meta_obj_ref.pool.ioctx(), meta_obj_ref.obj.oid, &op, rctx.y);
+ r = rgw_rados_operate(rctx.dpp, meta_obj_ref.ioctx, meta_obj_ref.obj.oid, &op, rctx.y);
ldpp_dout(rctx.dpp, 20) << "Update meta: " << meta_obj_ref.obj.oid << " part " << p << " prefix " << info.manifest.get_prefix() << " return " << r << dendl;
if (r == -EOPNOTSUPP) {
@@ -567,7 +569,7 @@ int MultipartObjectProcessor::complete(size_t accounted_size,
op = librados::ObjectWriteOperation{};
op.assert_exists(); // detect races with abort
op.omap_set(m);
- r = rgw_rados_operate(rctx.dpp, meta_obj_ref.pool.ioctx(), meta_obj_ref.obj.oid, &op, rctx.y);
+ r = rgw_rados_operate(rctx.dpp, meta_obj_ref.ioctx, meta_obj_ref.obj.oid, &op, rctx.y);
}
if (r < 0) {
return r == -ENOENT ? -ERR_NO_SUCH_UPLOAD : r;
diff --git a/src/rgw/driver/rados/rgw_putobj_processor.h b/src/rgw/driver/rados/rgw_putobj_processor.h
index 9a21c0c793a..ce163e36f77 100644
--- a/src/rgw/driver/rados/rgw_putobj_processor.h
+++ b/src/rgw/driver/rados/rgw_putobj_processor.h
@@ -18,7 +18,6 @@
#include <optional>
#include "rgw_putobj.h"
-#include "services/svc_rados.h"
#include "services/svc_tier_rados.h"
#include "rgw_sal.h"
#include "rgw_obj_manifest.h"
@@ -70,7 +69,7 @@ class RadosWriter : public rgw::sal::DataProcessor {
const RGWBucketInfo& bucket_info;
RGWObjectCtx& obj_ctx;
const rgw_obj head_obj;
- RGWSI_RADOS::Obj stripe_obj; // current stripe object
+ rgw_rados_ref stripe_obj; // current stripe object
RawObjSet written; // set of written objects for deletion
const DoutPrefixProvider *dpp;
optional_yield y;
diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc
index 2b5f7c4e7a6..9ef92270155 100644
--- a/src/rgw/driver/rados/rgw_rados.cc
+++ b/src/rgw/driver/rados/rgw_rados.cc
@@ -1020,8 +1020,8 @@ void RGWRados::finalize()
{
/* Before joining any sync threads, drain outstanding requests &
* mark the async_processor as going_down() */
- if (svc.rados) {
- svc.rados->stop_processor();
+ if (svc.async_processor) {
+ svc.async_processor->stop();
}
if (run_sync_thread) {
@@ -1172,7 +1172,7 @@ int RGWRados::update_service_map(const DoutPrefixProvider *dpp, std::map<std::st
return 0;
}
-/**
+/**
* Initialize the RADOS instance and prepare to do other ops
* Returns 0 on success, -ERR# on failure.
*/
@@ -1180,7 +1180,7 @@ int RGWRados::init_complete(const DoutPrefixProvider *dpp, optional_yield y)
{
int ret;
- /*
+ /*
* create sync module instance even if we don't run sync thread, might need it for radosgw-admin
*/
sync_module = svc.sync_modules->get_sync_module();
@@ -1258,7 +1258,7 @@ int RGWRados::init_complete(const DoutPrefixProvider *dpp, optional_yield y)
<< pt.second.name << " present in zonegroup" << dendl;
}
}
- auto async_processor = svc.rados->get_async_processor();
+ auto async_processor = svc.async_processor;
std::lock_guard l{meta_sync_thread_lock};
meta_sync_processor_thread = new RGWMetaSyncProcessorThread(this->driver, async_processor);
ret = meta_sync_processor_thread->init(dpp);
@@ -1283,7 +1283,7 @@ int RGWRados::init_complete(const DoutPrefixProvider *dpp, optional_yield y)
std::lock_guard dl{data_sync_thread_lock};
for (auto source_zone : svc.zone->get_data_sync_source_zones()) {
ldpp_dout(dpp, 5) << "starting data sync thread for zone " << source_zone->name << dendl;
- auto *thread = new RGWDataSyncProcessorThread(this->driver, svc.rados->get_async_processor(), source_zone);
+ auto *thread = new RGWDataSyncProcessorThread(this->driver, svc.async_processor, source_zone);
ret = thread->init(dpp);
if (ret < 0) {
ldpp_dout(dpp, 0) << "ERROR: failed to initialize data sync thread" << dendl;
@@ -1363,10 +1363,11 @@ int RGWRados::init_complete(const DoutPrefixProvider *dpp, optional_yield y)
int RGWRados::init_svc(bool raw, const DoutPrefixProvider *dpp)
{
if (raw) {
- return svc.init_raw(cct, use_cache, null_yield, dpp);
+ return svc.init_raw(cct, use_cache, get_rados_handle(), null_yield, dpp);
}
- return svc.init(cct, use_cache, run_sync_thread, null_yield, dpp);
+ return svc.init(cct, use_cache, run_sync_thread, get_rados_handle(),
+ null_yield, dpp);
}
int RGWRados::init_ctl(const DoutPrefixProvider *dpp)
@@ -1380,7 +1381,13 @@ int RGWRados::init_ctl(const DoutPrefixProvider *dpp)
*/
int RGWRados::init_begin(const DoutPrefixProvider *dpp)
{
- int ret = init_svc(false, dpp);
+ int ret = init_rados();
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to init rados (ret=" << cpp_strerror(-ret) << ")" << dendl;
+ return ret;
+ }
+
+ ret = init_svc(false, dpp);
if (ret < 0) {
ldpp_dout(dpp, 0) << "ERROR: failed to init services (ret=" << cpp_strerror(-ret) << ")" << dendl;
return ret;
@@ -1394,7 +1401,7 @@ int RGWRados::init_begin(const DoutPrefixProvider *dpp)
host_id = svc.zone_utils->gen_host_id();
- return init_rados();
+ return 0;
}
/**
@@ -1724,7 +1731,7 @@ int RGWRados::decode_policy(const DoutPrefixProvider *dpp,
ACLOwner *owner)
{
auto i = bl.cbegin();
- RGWAccessControlPolicy policy(cct);
+ RGWAccessControlPolicy policy;
try {
policy.decode_owner(i);
} catch (buffer::error& err) {
@@ -1922,7 +1929,7 @@ int RGWRados::Bucket::List::list_objects_ordered(
}
if (params.access_list_filter &&
- ! params.access_list_filter->filter(obj.name, index_key.name)) {
+ !params.access_list_filter(obj.name, index_key.name)) {
ldpp_dout(dpp, 20) << __func__ <<
": skipping past namespaced objects, including \"" << entry.key <<
"\"" << dendl;
@@ -2216,7 +2223,7 @@ int RGWRados::Bucket::List::list_objects_unordered(const DoutPrefixProvider *dpp
}
if (params.access_list_filter &&
- !params.access_list_filter->filter(obj.name, index_key.name)) {
+ !params.access_list_filter(obj.name, index_key.name)) {
ldpp_dout(dpp, 20) << __func__ <<
": skipping \"" << index_key <<
"\" because doesn't match filter" << dendl;
@@ -2387,7 +2394,9 @@ bool RGWRados::obj_to_raw(const rgw_placement_rule& placement_rule, const rgw_ob
std::string RGWRados::get_cluster_fsid(const DoutPrefixProvider *dpp, optional_yield y)
{
- return svc.rados->cluster_fsid();
+ std::string s;
+ rados.cluster_fsid(&s);
+ return s;
}
int RGWRados::get_obj_head_ioctx(const DoutPrefixProvider *dpp,
@@ -2422,25 +2431,19 @@ int RGWRados::get_obj_head_ref(const DoutPrefixProvider *dpp,
const rgw_obj& obj,
rgw_rados_ref *ref)
{
- get_obj_bucket_and_oid_loc(obj, ref->obj.oid, ref->obj.loc);
-
- rgw_pool pool;
- if (!get_obj_data_pool(target_placement_rule, obj, &pool)) {
- ldpp_dout(dpp, 0) << "ERROR: cannot get data pool for obj=" << obj << ", probably misconfiguration" << dendl;
+ rgw_raw_obj raw;
+ get_obj_bucket_and_oid_loc(obj, raw.oid, raw.loc);
+ if (!get_obj_data_pool(target_placement_rule, obj, &raw.pool)) {
+ ldpp_dout(dpp, 0) << "ERROR: cannot get data pool for obj=" << obj
+ << ", probably misconfiguration" << dendl;
return -EIO;
}
-
- ref->pool = svc.rados->pool(pool);
-
- int r = ref->pool.open(dpp, RGWSI_RADOS::OpenParams()
- .set_mostly_omap(false));
+ auto r = rgw_get_rados_ref(dpp, get_rados_handle(), raw, ref);
if (r < 0) {
- ldpp_dout(dpp, 0) << "ERROR: failed opening data pool (pool=" << pool << "); r=" << r << dendl;
+ ldpp_dout(dpp, 0) << "ERROR: failed opening data pool (pool="
+ << raw.pool << "); r=" << r << dendl;
return r;
}
-
- ref->pool.ioctx().locator_set_key(ref->obj.loc);
-
return 0;
}
@@ -2452,24 +2455,18 @@ int RGWRados::get_obj_head_ref(const DoutPrefixProvider *dpp,
return get_obj_head_ref(dpp, bucket_info.placement_rule, obj, ref);
}
-int RGWRados::get_raw_obj_ref(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, rgw_rados_ref *ref)
+int RGWRados::get_raw_obj_ref(const DoutPrefixProvider *dpp, rgw_raw_obj obj, rgw_rados_ref* ref)
{
- ref->obj = obj;
-
- if (ref->obj.oid.empty()) {
- ref->obj.oid = obj.pool.to_str();
- ref->obj.pool = svc.zone->get_zone_params().domain_root;
+ if (obj.oid.empty()) {
+ obj.oid = obj.pool.to_str();
+ obj.pool = svc.zone->get_zone_params().domain_root;
}
- ref->pool = svc.rados->pool(obj.pool);
- int r = ref->pool.open(dpp, RGWSI_RADOS::OpenParams()
- .set_mostly_omap(false));
+ int r = rgw_get_rados_ref(dpp, get_rados_handle(), std::move(obj), ref);
if (r < 0) {
ldpp_dout(dpp, 0) << "ERROR: failed opening pool (pool=" << obj.pool << "); r=" << r << dendl;
return r;
}
- ref->pool.ioctx().locator_set_key(ref->obj.loc);
-
return 0;
}
@@ -2678,10 +2675,10 @@ int RGWRados::fix_tail_obj_locator(const DoutPrefixProvider *dpp,
continue;
}
- auto& ioctx = ref.pool.ioctx();
+ auto& ioctx = ref.ioctx;
get_obj_bucket_and_oid_loc(loc, oid, locator);
- ref.pool.ioctx().locator_set_key(locator);
+ ref.ioctx.locator_set_key(locator);
ldpp_dout(dpp, 20) << __func__ << ": key=" << key << " oid=" << oid << " locator=" << locator << dendl;
@@ -2742,7 +2739,7 @@ int RGWRados::BucketShard::init(const rgw_bucket& _bucket,
ldpp_dout(dpp, 0) << "ERROR: open_bucket_index_shard() returned ret=" << ret << dendl;
return ret;
}
- ldpp_dout(dpp, 20) << " bucket index object: " << bucket_obj.get_raw_obj() << dendl;
+ ldpp_dout(dpp, 20) << " bucket index object: " << bucket_obj << dendl;
return 0;
}
@@ -3215,10 +3212,10 @@ int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_si
return r;
}
- auto& ioctx = ref.pool.ioctx();
+ auto& ioctx = ref.ioctx;
tracepoint(rgw_rados, operate_enter, req_id.c_str());
- r = rgw_rados_operate(rctx.dpp, ref.pool.ioctx(), ref.obj.oid, &op, rctx.y);
+ r = rgw_rados_operate(rctx.dpp, ref.ioctx, ref.obj.oid, &op, rctx.y);
tracepoint(rgw_rados, operate_exit, req_id.c_str());
if (r < 0) { /* we can expect to get -ECANCELED if object was replaced under,
or -ENOENT if was removed, or -EEXIST if it did not exist
@@ -4330,8 +4327,8 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& obj_ctx,
}
ACLOwner new_owner;
- new_owner.set_id(*override_owner);
- new_owner.set_name(owner_info.display_name);
+ new_owner.id = *override_owner;
+ new_owner.display_name = owner_info.display_name;
acl.set_owner(new_owner);
@@ -4783,8 +4780,10 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx,
ref_tag = tag + '\0';
cls_refcount_get(op, ref_tag, true);
- auto obj = svc.rados->obj(miter.get_location().get_raw_obj(this));
- ret = obj.open(dpp);
+ rgw_rados_ref obj;
+ ret = rgw_get_rados_ref(dpp, driver->getRados()->get_rados_handle(),
+ miter.get_location().get_raw_obj(this),
+ &obj);
if (ret < 0) {
ldpp_dout(dpp, 0) << "failed to open rados context for " << obj << dendl;
goto done_ret;
@@ -4792,8 +4791,9 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx,
static constexpr uint64_t cost = 1; // 1 throttle unit per request
static constexpr uint64_t id = 0; // ids unused
- auto& ref = obj.get_ref();
- rgw::AioResultList completed = aio->get(ref.obj, rgw::Aio::librados_op(ref.pool.ioctx(), std::move(op), y), cost, id);
+ rgw::AioResultList completed =
+ aio->get(obj.obj, rgw::Aio::librados_op(obj.ioctx, std::move(op), y),
+ cost, id);
ret = rgw::check_for_errors(completed);
all_results.splice(all_results.end(), completed);
if (ret < 0) {
@@ -4860,19 +4860,20 @@ done_ret:
if (r.result < 0) {
continue; // skip errors
}
- auto obj = svc.rados->obj(r.obj);
- ret2 = obj.open(dpp);
+ rgw_rados_ref obj;
+ ret2 = rgw_get_rados_ref(dpp, get_rados_handle(), r.obj, &obj);
if (ret2 < 0) {
continue;
}
- auto& ref = obj.get_ref();
ObjectWriteOperation op;
cls_refcount_put(op, ref_tag, true);
static constexpr uint64_t cost = 1; // 1 throttle unit per request
static constexpr uint64_t id = 0; // ids unused
- rgw::AioResultList completed = aio->get(ref.obj, rgw::Aio::librados_op(ref.pool.ioctx(), std::move(op), y), cost, id);
+ rgw::AioResultList completed =
+ aio->get(obj.obj, rgw::Aio::librados_op(obj.ioctx, std::move(op), y),
+ cost, id);
ret2 = rgw::check_for_errors(completed);
if (ret2 < 0) {
ldpp_dout(dpp, 0) << "ERROR: cleanup after error failed to drop reference on obj=" << r.obj << dendl;
@@ -5072,7 +5073,7 @@ int RGWRados::check_bucket_empty(const DoutPrefixProvider *dpp, RGWBucketInfo& b
int RGWRados::delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, optional_yield y, const DoutPrefixProvider *dpp, bool check_empty)
{
const rgw_bucket& bucket = bucket_info.bucket;
- RGWSI_RADOS::Pool index_pool;
+ librados::IoCtx index_pool;
map<int, string> bucket_objs;
int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, std::nullopt, bucket_info.layout.current_index, &index_pool, &bucket_objs, nullptr);
if (r < 0)
@@ -5127,7 +5128,7 @@ int RGWRados::delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& ob
}
/* remove bucket index objects asynchronously by best effort */
- (void) CLSRGWIssueBucketIndexClean(index_pool.ioctx(),
+ (void) CLSRGWIssueBucketIndexClean(index_pool,
bucket_objs,
cct->_conf->rgw_bucket_index_max_aio)();
}
@@ -5151,7 +5152,7 @@ int RGWRados::set_bucket_owner(rgw_bucket& bucket, ACLOwner& owner, const DoutPr
return r;
}
- info.owner = owner.get_id();
+ info.owner = owner.id;
r = put_bucket_instance_info(info, false, real_time(), &attrs, dpp, y);
if (r < 0) {
@@ -5315,7 +5316,7 @@ int RGWRados::bucket_check_index(const DoutPrefixProvider *dpp, RGWBucketInfo& b
map<RGWObjCategory, RGWStorageStats> *existing_stats,
map<RGWObjCategory, RGWStorageStats> *calculated_stats)
{
- RGWSI_RADOS::Pool index_pool;
+ librados::IoCtx index_pool;
// key - bucket index object id
// value - bucket index check OP returned result with the given bucket index object (shard)
@@ -5332,7 +5333,7 @@ int RGWRados::bucket_check_index(const DoutPrefixProvider *dpp, RGWBucketInfo& b
bucket_objs_ret.emplace(iter.first, rgw_cls_check_index_ret());
}
- ret = CLSRGWIssueBucketCheck(index_pool.ioctx(), oids, bucket_objs_ret, cct->_conf->rgw_bucket_index_max_aio)();
+ ret = CLSRGWIssueBucketCheck(index_pool, oids, bucket_objs_ret, cct->_conf->rgw_bucket_index_max_aio)();
if (ret < 0) {
return ret;
}
@@ -5348,7 +5349,7 @@ int RGWRados::bucket_check_index(const DoutPrefixProvider *dpp, RGWBucketInfo& b
int RGWRados::bucket_rebuild_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info)
{
- RGWSI_RADOS::Pool index_pool;
+ librados::IoCtx index_pool;
map<int, string> bucket_objs;
int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, std::nullopt, bucket_info.layout.current_index, &index_pool, &bucket_objs, nullptr);
@@ -5356,7 +5357,7 @@ int RGWRados::bucket_rebuild_index(const DoutPrefixProvider *dpp, RGWBucketInfo&
return r;
}
- return CLSRGWIssueBucketRebuild(index_pool.ioctx(), bucket_objs, cct->_conf->rgw_bucket_index_max_aio)();
+ return CLSRGWIssueBucketRebuild(index_pool, bucket_objs, cct->_conf->rgw_bucket_index_max_aio)();
}
static int resync_encrypted_multipart(const DoutPrefixProvider* dpp,
@@ -5495,7 +5496,7 @@ int RGWRados::bucket_resync_encrypted_multipart(const DoutPrefixProvider* dpp,
int RGWRados::bucket_set_reshard(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const cls_rgw_bucket_instance_entry& entry)
{
- RGWSI_RADOS::Pool index_pool;
+ librados::IoCtx index_pool;
map<int, string> bucket_objs;
int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, std::nullopt, bucket_info.layout.current_index, &index_pool, &bucket_objs, nullptr);
@@ -5506,7 +5507,7 @@ int RGWRados::bucket_set_reshard(const DoutPrefixProvider *dpp, const RGWBucketI
return r;
}
- r = CLSRGWIssueSetBucketResharding(index_pool.ioctx(), bucket_objs, entry, cct->_conf->rgw_bucket_index_max_aio)();
+ r = CLSRGWIssueSetBucketResharding(index_pool, bucket_objs, entry, cct->_conf->rgw_bucket_index_max_aio)();
if (r < 0) {
ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
": unable to issue set bucket resharding, r=" << r << " (" <<
@@ -5619,8 +5620,8 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi
struct rgw_bucket_dir_entry_meta meta;
- meta.owner = params.obj_owner.get_id().to_str();
- meta.owner_display_name = params.obj_owner.get_display_name();
+ meta.owner = params.obj_owner.id.to_str();
+ meta.owner_display_name = params.obj_owner.display_name;
if (real_clock::is_zero(params.mtime)) {
meta.mtime = real_clock::now();
@@ -5740,7 +5741,7 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi
store->remove_rgw_head_obj(op);
- auto& ioctx = ref.pool.ioctx();
+ auto& ioctx = ref.ioctx;
r = rgw_rados_operate(dpp, ioctx, ref.obj.oid, &op, y);
/* raced with another operation, object state is indeterminate */
@@ -5812,7 +5813,7 @@ int RGWRados::delete_raw_obj(const DoutPrefixProvider *dpp, const rgw_raw_obj& o
ObjectWriteOperation op;
op.remove();
- r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y);
+ r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y);
if (r < 0)
return r;
@@ -6469,7 +6470,7 @@ int RGWRados::set_attrs(const DoutPrefixProvider *dpp, RGWObjectCtx* octx, RGWBu
}
struct timespec mtime_ts = real_clock::to_timespec(mtime);
op.mtime2(&mtime_ts);
- auto& ioctx = ref.pool.ioctx();
+ auto& ioctx = ref.ioctx;
r = rgw_rados_operate(dpp, ioctx, ref.obj.oid, &op, y);
if (state) {
if (r >= 0) {
@@ -6891,8 +6892,8 @@ int RGWRados::Bucket::UpdateIndex::complete(const DoutPrefixProvider *dpp, int64
ldpp_dout(dpp, 0) << "WARNING: could not decode policy ret=" << ret << dendl;
}
}
- ent.meta.owner = owner.get_id().to_str();
- ent.meta.owner_display_name = owner.get_display_name();
+ ent.meta.owner = owner.id.to_str();
+ ent.meta.owner_display_name = owner.display_name;
ent.meta.content_type = content_type;
ent.meta.appendable = appendable;
@@ -7161,8 +7162,9 @@ int RGWRados::get_obj_iterate_cb(const DoutPrefixProvider *dpp,
}
}
- auto obj = d->rgwrados->svc.rados->obj(read_obj);
- int r = obj.open(dpp);
+ rgw_rados_ref obj;
+ int r = rgw_get_rados_ref(dpp, d->rgwrados->get_rados_handle(), read_obj,
+ &obj);
if (r < 0) {
ldpp_dout(dpp, 4) << "failed to open rados context for " << read_obj << dendl;
return r;
@@ -7174,8 +7176,7 @@ int RGWRados::get_obj_iterate_cb(const DoutPrefixProvider *dpp,
const uint64_t cost = len;
const uint64_t id = obj_ofs; // use logical object offset for sorting replies
- auto& ref = obj.get_ref();
- auto completed = d->aio->get(ref.obj, rgw::Aio::librados_op(ref.pool.ioctx(), std::move(op), d->yield), cost, id);
+ auto completed = d->aio->get(obj.obj, rgw::Aio::librados_op(obj.ioctx, std::move(op), d->yield), cost, id);
return d->flush(std::move(completed));
}
@@ -7282,7 +7283,7 @@ int RGWRados::obj_operate(const DoutPrefixProvider *dpp, const RGWBucketInfo& bu
return r;
}
- return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, op, y);
+ return rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, op, y);
}
int RGWRados::obj_operate(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, ObjectReadOperation *op, optional_yield y)
@@ -7295,7 +7296,7 @@ int RGWRados::obj_operate(const DoutPrefixProvider *dpp, const RGWBucketInfo& bu
bufferlist outbl;
- return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, op, &outbl, y);
+ return rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, op, &outbl, y);
}
void RGWRados::olh_cancel_modification(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info,
@@ -7320,7 +7321,7 @@ void RGWRados::olh_cancel_modification(const DoutPrefixProvider *dpp, const RGWB
ObjectWriteOperation op;
bucket_index_guard_olh_op(dpp, state, op);
op.rmxattr(attr_name.c_str());
- r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y);
+ r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y);
if (r < 0) {
if (r != -ENOENT && r != -ECANCELED) {
ldpp_dout(dpp, 0) << __func__ << " target_obj=" << olh_obj << " rmxattr rgw_rados_operate() returned " << r << dendl;
@@ -7336,7 +7337,7 @@ void RGWRados::olh_cancel_modification(const DoutPrefixProvider *dpp, const RGWB
rm_op.cmpxattr(RGW_ATTR_OLH_INFO, CEPH_OSD_CMPXATTR_OP_EQ, bufferlist());
cls_obj_check_prefix_exist(rm_op, RGW_ATTR_OLH_PENDING_PREFIX, true);
rm_op.remove();
- r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &rm_op, y);
+ r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &rm_op, y);
}
if (r < 0 && (r != -ENOENT && r != -ECANCELED)) {
ldpp_dout(dpp, 0) << __func__ << " target_obj=" << olh_obj << " olh rm rgw_rados_operate() returned " << r << dendl;
@@ -7556,8 +7557,8 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
constexpr int num_retries = 10;
for (int i = 1; i <= num_retries; i++) { // nb: 1-based for loop
- auto& ref = bs->bucket_obj.get_ref();
- ret = cls_rgw_get_bucket_resharding(ref.pool.ioctx(), ref.obj.oid, &entry);
+ auto& ref = bs->bucket_obj;
+ ret = cls_rgw_get_bucket_resharding(ref.ioctx, ref.obj.oid, &entry);
if (ret == -ENOENT) {
ret = fetch_new_bucket_info("get_bucket_resharding_failed");
if (ret < 0) {
@@ -7696,7 +7697,7 @@ int RGWRados::bucket_index_link_olh(const DoutPrefixProvider *dpp, RGWBucketInfo
r = guard_reshard(dpp, &bs, obj_instance, bucket_info,
[&](BucketShard *bs) -> int {
cls_rgw_obj_key key(obj_instance.key.get_index_key_name(), obj_instance.key.instance);
- auto& ref = bs->bucket_obj.get_ref();
+ auto& ref = bs->bucket_obj;
librados::ObjectWriteOperation op;
op.assert_exists(); // bucket index shard must exist
cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
@@ -7704,7 +7705,7 @@ int RGWRados::bucket_index_link_olh(const DoutPrefixProvider *dpp, RGWBucketInfo
delete_marker, op_tag, meta, olh_epoch,
unmod_since, high_precision_time,
svc.zone->need_to_log_data(), zones_trace);
- return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y);
+ return rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y);
}, y);
if (r < 0) {
ldpp_dout(dpp, 20) << "rgw_rados_operate() after cls_rgw_bucket_link_olh() returned r=" << r << dendl;
@@ -7747,13 +7748,13 @@ int RGWRados::bucket_index_unlink_instance(const DoutPrefixProvider *dpp,
cls_rgw_obj_key key(obj_instance.key.get_index_key_name(), obj_instance.key.instance);
r = guard_reshard(dpp, &bs, obj_instance, bucket_info,
[&](BucketShard *bs) -> int {
- auto& ref = bs->bucket_obj.get_ref();
+ auto& ref = bs->bucket_obj;
librados::ObjectWriteOperation op;
op.assert_exists(); // bucket index shard must exist
cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
cls_rgw_bucket_unlink_instance(op, key, op_tag,
olh_tag, olh_epoch, svc.zone->need_to_log_data(), zones_trace);
- return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y);
+ return rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y);
}, y);
if (r < 0) {
ldpp_dout(dpp, 20) << "rgw_rados_operate() after cls_rgw_bucket_link_instance() returned r=" << r << dendl;
@@ -7787,14 +7788,14 @@ int RGWRados::bucket_index_read_olh_log(const DoutPrefixProvider *dpp,
cls_rgw_obj_key key(obj_instance.key.get_index_key_name(), string());
- auto& shard_ref = bs.bucket_obj.get_ref();
+ auto& shard_ref = bs.bucket_obj;
ObjectReadOperation op;
rgw_cls_read_olh_log_ret log_ret;
int op_ret = 0;
- cls_rgw_get_olh_log(op, key, ver_marker, olh_tag, log_ret, op_ret);
+ cls_rgw_get_olh_log(op, key, ver_marker, olh_tag, log_ret, op_ret);
bufferlist outbl;
- r = rgw_rados_operate(dpp, shard_ref.pool.ioctx(), shard_ref.obj.oid, &op, &outbl, y);
+ r = rgw_rados_operate(dpp, shard_ref.ioctx, shard_ref.obj.oid, &op, &outbl, y);
if (r < 0) {
return r;
}
@@ -7855,7 +7856,7 @@ int RGWRados::repair_olh(const DoutPrefixProvider *dpp, RGWObjState* state, cons
if (r < 0) {
return r;
}
- r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y);
+ r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y);
if (r < 0) {
ldpp_dout(dpp, 0) << "repair_olh failed to write olh attributes with "
<< cpp_strerror(r) << dendl;
@@ -7922,10 +7923,10 @@ int RGWRados::bucket_index_clear_olh(const DoutPrefixProvider *dpp,
[&](BucketShard *pbs) -> int {
ObjectWriteOperation op;
op.assert_exists(); // bucket index shard must exist
- auto& ref = pbs->bucket_obj.get_ref();
+ auto& ref = bs.bucket_obj;
cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING);
cls_rgw_clear_olh(op, key, olh_tag);
- return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y);
+ return rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y);
}, y);
if (ret < 0) {
ldpp_dout(dpp, 5) << "rgw_rados_operate() after cls_rgw_clear_olh() returned ret=" << ret << dendl;
@@ -8076,7 +8077,7 @@ int RGWRados::apply_olh_log(const DoutPrefixProvider *dpp,
}
/* update olh object */
- r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y);
+ r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y);
if (r < 0) {
ldpp_dout(dpp, 0) << "ERROR: " << __func__ << ": could not apply olh update to oid \"" << ref.obj.oid << "\", r=" << r << dendl;
return r;
@@ -8154,7 +8155,7 @@ int RGWRados::clear_olh(const DoutPrefixProvider *dpp,
cls_obj_check_prefix_exist(rm_op, RGW_ATTR_OLH_PENDING_PREFIX, true); /* fail if found one of these, pending modification */
rm_op.remove();
- r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &rm_op, y);
+ r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &rm_op, y);
if (r == -ECANCELED) {
return r; /* someone else made a modification in the meantime */
}
@@ -8442,7 +8443,7 @@ int RGWRados::remove_olh_pending_entries(const DoutPrefixProvider *dpp, const RG
op.rmxattr(i->first.c_str());
}
- r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y);
+ r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y);
if (r == -ENOENT || r == -ECANCELED) {
/* raced with some other change, shouldn't sweat about it */
return 0;
@@ -8538,10 +8539,10 @@ int RGWRados::raw_obj_stat(const DoutPrefixProvider *dpp,
op.read(0, cct->_conf->rgw_max_chunk_size, first_chunk, NULL);
}
bufferlist outbl;
- r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, &outbl, y);
+ r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, &outbl, y);
if (epoch) {
- *epoch = ref.pool.ioctx().get_last_version();
+ *epoch = ref.ioctx.get_last_version();
}
if (r < 0)
@@ -8603,34 +8604,36 @@ int RGWRados::get_bucket_stats(const DoutPrefixProvider *dpp,
}
class RGWGetBucketStatsContext : public RGWGetDirHeader_CB {
- RGWGetBucketStats_CB *cb;
+ boost::intrusive_ptr<rgw::sal::ReadStatsCB> cb;
uint32_t pendings;
- map<RGWObjCategory, RGWStorageStats> stats;
+ RGWStorageStats stats;
int ret_code;
bool should_cb;
ceph::mutex lock = ceph::make_mutex("RGWGetBucketStatsContext");
public:
- RGWGetBucketStatsContext(RGWGetBucketStats_CB *_cb, uint32_t _pendings)
- : cb(_cb), pendings(_pendings), stats(), ret_code(0), should_cb(true)
+ RGWGetBucketStatsContext(boost::intrusive_ptr<rgw::sal::ReadStatsCB> cb, uint32_t _pendings)
+ : cb(std::move(cb)), pendings(_pendings), stats(), ret_code(0), should_cb(true)
{}
- void handle_response(int r, rgw_bucket_dir_header& header) override {
+ void handle_response(int r, const rgw_bucket_dir_header& header) override {
std::lock_guard l{lock};
if (should_cb) {
- if ( r >= 0) {
- accumulate_raw_stats(header, stats);
+ if (r >= 0) {
+ for (const auto& [c, s] : header.stats) {
+ stats.size += s.total_size;
+ stats.size_rounded += s.total_size_rounded;
+ stats.size_utilized += s.actual_size;
+ stats.num_objects += s.num_entries;
+ }
} else {
ret_code = r;
}
// Are we all done?
if (--pendings == 0) {
- if (!ret_code) {
- cb->set_response(&stats);
- }
- cb->handle_response(ret_code);
- cb->put();
+ cb->handle_response(ret_code, stats);
+ cb.reset();
}
}
}
@@ -8641,19 +8644,16 @@ public:
}
};
-int RGWRados::get_bucket_stats_async(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout, int shard_id, RGWGetBucketStats_CB *ctx)
+int RGWRados::get_bucket_stats_async(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout, int shard_id, boost::intrusive_ptr<rgw::sal::ReadStatsCB> cb)
{
int num_aio = 0;
- RGWGetBucketStatsContext *get_ctx = new RGWGetBucketStatsContext(ctx, bucket_info.layout.current_index.layout.normal.num_shards ? : 1);
- ceph_assert(get_ctx);
- int r = cls_bucket_head_async(dpp, bucket_info, idx_layout, shard_id, get_ctx, &num_aio);
+ boost::intrusive_ptr headercb = new RGWGetBucketStatsContext(std::move(cb), bucket_info.layout.current_index.layout.normal.num_shards ? : 1);
+ int r = cls_bucket_head_async(dpp, bucket_info, idx_layout, shard_id, headercb, &num_aio);
if (r < 0) {
- ctx->put();
if (num_aio) {
- get_ctx->unset_cb();
+ headercb->unset_cb();
}
}
- get_ctx->put();
return r;
}
@@ -8774,7 +8774,7 @@ int RGWRados::append_async(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, size
librados::Rados *rad = get_rados_handle();
librados::AioCompletion *completion = rad->aio_create_completion(nullptr, nullptr);
- r = ref.pool.ioctx().aio_append(ref.obj.oid, completion, bl, size);
+ r = ref.ioctx.aio_append(ref.obj.oid, completion, bl, size);
completion->release();
return r;
}
@@ -8830,7 +8830,7 @@ string RGWRados::pool_iterate_get_cursor(RGWPoolIterCtx& ctx)
static int do_pool_iterate(const DoutPrefixProvider *dpp, CephContext* cct, RGWPoolIterCtx& ctx, uint32_t num,
vector<rgw_bucket_dir_entry>& objs,
- bool *is_truncated, RGWAccessListFilter *filter)
+ bool *is_truncated, const rgw::AccessListFilter& filter)
{
librados::IoCtx& io_ctx = ctx.io_ctx;
librados::NObjectIterator& iter = ctx.iter;
@@ -8847,7 +8847,7 @@ static int do_pool_iterate(const DoutPrefixProvider *dpp, CephContext* cct, RGWP
ldpp_dout(dpp, 20) << "RGWRados::pool_iterate: got " << oid << dendl;
// fill it in with initial values; we may correct later
- if (filter && !filter->filter(oid, oid))
+ if (filter && !filter(oid, oid))
continue;
e.key = oid;
@@ -8861,7 +8861,7 @@ static int do_pool_iterate(const DoutPrefixProvider *dpp, CephContext* cct, RGWP
}
int RGWRados::pool_iterate(const DoutPrefixProvider *dpp, RGWPoolIterCtx& ctx, uint32_t num, vector<rgw_bucket_dir_entry>& objs,
- bool *is_truncated, RGWAccessListFilter *filter)
+ bool *is_truncated, const rgw::AccessListFilter& filter)
{
// catch exceptions from NObjectIterator::operator++()
try {
@@ -8898,9 +8898,9 @@ int RGWRados::list_raw_objects_next(const DoutPrefixProvider *dpp, const string&
if (!ctx.initialized) {
return -EINVAL;
}
- RGWAccessListFilterPrefix filter(prefix_filter);
+ auto filter = rgw::AccessListFilterPrefix(prefix_filter);
vector<rgw_bucket_dir_entry> objs;
- int r = pool_iterate(dpp, ctx.iter_ctx, max, objs, is_truncated, &filter);
+ int r = pool_iterate(dpp, ctx.iter_ctx, max, objs, is_truncated, filter);
if (r < 0) {
if(r != -ENOENT)
ldpp_dout(dpp, 10) << "failed to list objects pool_iterate returned r=" << r << dendl;
@@ -8990,21 +8990,21 @@ int RGWRados::bi_get(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_
cls_rgw_obj_key key(obj.key.get_index_key_name(), obj.key.instance);
- auto& ref = bs.bucket_obj.get_ref();
-
- return cls_rgw_bi_get(ref.pool.ioctx(), ref.obj.oid, index_type, key, entry);
+ auto& ref = bs.bucket_obj;
+
+ return cls_rgw_bi_get(ref.ioctx, ref.obj.oid, index_type, key, entry);
}
void RGWRados::bi_put(ObjectWriteOperation& op, BucketShard& bs, rgw_cls_bi_entry& entry, optional_yield y)
{
- auto& ref = bs.bucket_obj.get_ref();
+ auto& ref = bs.bucket_obj;
cls_rgw_bi_put(op, ref.obj.oid, entry);
}
int RGWRados::bi_put(BucketShard& bs, rgw_cls_bi_entry& entry, optional_yield y)
{
- auto& ref = bs.bucket_obj.get_ref();
- int ret = cls_rgw_bi_put(ref.pool.ioctx(), ref.obj.oid, entry);
+ auto& ref = bs.bucket_obj;
+ int ret = cls_rgw_bi_put(ref.ioctx, ref.obj.oid, entry);
if (ret < 0)
return ret;
@@ -9042,8 +9042,8 @@ int RGWRados::bi_list(const DoutPrefixProvider *dpp, rgw_bucket& bucket,
return ret;
}
- auto& ref = bs.bucket_obj.get_ref();
- ret = cls_rgw_bi_list(ref.pool.ioctx(), ref.obj.oid, obj_name_filter, marker, max, entries, is_truncated);
+ auto& ref = bs.bucket_obj;
+ ret = cls_rgw_bi_list(ref.ioctx, ref.obj.oid, obj_name_filter, marker, max, entries, is_truncated);
if (ret == -ENOENT) {
*is_truncated = false;
}
@@ -9056,8 +9056,8 @@ int RGWRados::bi_list(const DoutPrefixProvider *dpp, rgw_bucket& bucket,
int RGWRados::bi_list(BucketShard& bs, const string& obj_name_filter, const string& marker, uint32_t max,
list<rgw_cls_bi_entry> *entries, bool *is_truncated, optional_yield y)
{
- auto& ref = bs.bucket_obj.get_ref();
- int ret = cls_rgw_bi_list(ref.pool.ioctx(), ref.obj.oid, obj_name_filter, marker, max, entries, is_truncated);
+ auto& ref = bs.bucket_obj;
+ int ret = cls_rgw_bi_list(ref.ioctx, ref.obj.oid, obj_name_filter, marker, max, entries, is_truncated);
if (ret < 0)
return ret;
@@ -9082,8 +9082,8 @@ int RGWRados::bi_list(const DoutPrefixProvider *dpp,
int RGWRados::bi_remove(const DoutPrefixProvider *dpp, BucketShard& bs)
{
- auto& ref = bs.bucket_obj.get_ref();
- int ret = ref.pool.ioctx().remove(ref.obj.oid);
+ auto& ref = bs.bucket_obj;
+ int ret = ref.ioctx.remove(ref.obj.oid);
if (ret == -ENOENT) {
ret = 0;
}
@@ -9247,13 +9247,13 @@ int RGWRados::cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj
int RGWRados::cls_obj_set_bucket_tag_timeout(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, uint64_t timeout)
{
- RGWSI_RADOS::Pool index_pool;
+ librados::IoCtx index_pool;
map<int, string> bucket_objs;
int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, std::nullopt, bucket_info.layout.current_index, &index_pool, &bucket_objs, nullptr);
if (r < 0)
return r;
- return CLSRGWIssueSetTagTimeout(index_pool.ioctx(), bucket_objs, cct->_conf->rgw_bucket_index_max_aio, timeout)();
+ return CLSRGWIssueSetTagTimeout(index_pool, bucket_objs, cct->_conf->rgw_bucket_index_max_aio, timeout)();
}
@@ -9333,7 +9333,7 @@ int RGWRados::cls_bucket_list_ordered(const DoutPrefixProvider *dpp,
m.clear();
- RGWSI_RADOS::Pool index_pool;
+ librados::IoCtx index_pool;
// key - oid (for different shards if there is any)
// value - list result for the corresponding oid (shard), it is filled by
// the AIO callback
@@ -9381,7 +9381,7 @@ int RGWRados::cls_bucket_list_ordered(const DoutPrefixProvider *dpp,
" shard(s) for " << num_entries_per_shard << " entries to get " <<
num_entries << " total entries" << dendl;
- auto& ioctx = index_pool.ioctx();
+ auto& ioctx = index_pool;
std::map<int, rgw_cls_list_ret> shard_list_results;
cls_rgw_obj_key start_after_key(start_after.name, start_after.instance);
r = CLSRGWIssueBucketList(ioctx, start_after_key, prefix, delimiter,
@@ -9666,10 +9666,9 @@ int RGWRados::cls_bucket_list_unordered(const DoutPrefixProvider *dpp,
ldout_bitx(bitx, dpp, 25) << "BACKTRACE: " << __func__ << ": " << ClibBackTrace(0) << dendl_bitx;
ent_list.clear();
- static MultipartMetaFilter multipart_meta_filter;
*is_truncated = false;
- RGWSI_RADOS::Pool index_pool;
+ librados::IoCtx index_pool;
std::map<int, std::string> oids;
int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, shard_id, idx_layout, &index_pool, &oids, nullptr);
@@ -9677,7 +9676,7 @@ int RGWRados::cls_bucket_list_unordered(const DoutPrefixProvider *dpp,
return r;
}
- auto& ioctx = index_pool.ioctx();
+ auto& ioctx = index_pool;
const uint32_t num_shards = oids.size();
@@ -9848,7 +9847,7 @@ int RGWRados::cls_obj_usage_log_add(const DoutPrefixProvider *dpp, const string&
ObjectWriteOperation op;
cls_rgw_usage_log_add(op, info);
- r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y);
+ r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y);
return r;
}
@@ -9867,7 +9866,7 @@ int RGWRados::cls_obj_usage_log_read(const DoutPrefixProvider *dpp, const string
*is_truncated = false;
- r = cls_rgw_usage_log_read(ref.pool.ioctx(), ref.obj.oid, user, bucket, start_epoch, end_epoch,
+ r = cls_rgw_usage_log_read(ref.ioctx, ref.obj.oid, user, bucket, start_epoch, end_epoch,
max_entries, read_iter, usage, is_truncated);
return r;
@@ -9879,7 +9878,7 @@ static int cls_rgw_usage_log_trim_repeat(const DoutPrefixProvider *dpp, rgw_rado
do {
librados::ObjectWriteOperation op;
cls_rgw_usage_log_trim(op, user, bucket, start_epoch, end_epoch);
- int r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y);
+ int r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y);
if (r == -ENODATA)
done = true;
else if (r < 0)
@@ -9915,7 +9914,7 @@ int RGWRados::cls_obj_usage_log_clear(const DoutPrefixProvider *dpp, string& oid
}
librados::ObjectWriteOperation op;
cls_rgw_usage_log_clear(op);
- r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y);
+ r = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y);
return r;
}
@@ -9938,7 +9937,7 @@ int RGWRados::remove_objs_from_index(const DoutPrefixProvider *dpp,
}
const uint32_t num_shards = current_index.layout.normal.num_shards;
- RGWSI_RADOS::Pool index_pool;
+ librados::IoCtx index_pool;
std::map<int, std::string> index_oids;
int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, std::nullopt,
bucket_info.layout.current_index,
@@ -9981,7 +9980,7 @@ int RGWRados::remove_objs_from_index(const DoutPrefixProvider *dpp,
", shard=" << shard << ", oid=" << oid << ", num_keys=" <<
removals.second.size() << dendl_bitx;
- r = index_pool.ioctx().omap_rm_keys(oid, removals.second);
+ r = index_pool.omap_rm_keys(oid, removals.second);
if (r < 0) {
ldout_bitx(bitx, dpp, 0) << "ERROR: " << __func__ <<
": omap_rm_keys returned ret=" << r <<
@@ -10014,9 +10013,8 @@ int RGWRados::check_disk_state(const DoutPrefixProvider *dpp,
rgw_obj obj(bucket_info.bucket, list_state.key);
- MultipartMetaFilter multipart_meta_filter;
string temp_key;
- if (multipart_meta_filter.filter(list_state.key.name, temp_key)) {
+ if (MultipartMetaFilter(list_state.key.name, temp_key)) {
obj.in_extra_data = true;
}
@@ -10111,8 +10109,8 @@ int RGWRados::check_disk_state(const DoutPrefixProvider *dpp,
object.meta.etag = etag;
object.meta.content_type = content_type;
object.meta.storage_class = storage_class;
- object.meta.owner = owner.get_id().to_str();
- object.meta.owner_display_name = owner.get_display_name();
+ object.meta.owner = owner.id.to_str();
+ object.meta.owner_display_name = owner.display_name;
object.meta.appendable = appendable;
// encode suggested updates
@@ -10141,8 +10139,8 @@ int RGWRados::check_disk_state(const DoutPrefixProvider *dpp,
list_state.tag = astate->obj_tag.c_str();
}
- list_state.meta.owner = owner.get_id().to_str();
- list_state.meta.owner_display_name = owner.get_display_name();
+ list_state.meta.owner = owner.id.to_str();
+ list_state.meta.owner_display_name = owner.display_name;
list_state.exists = true;
@@ -10156,7 +10154,7 @@ int RGWRados::check_disk_state(const DoutPrefixProvider *dpp,
int RGWRados::cls_bucket_head(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout, int shard_id, vector<rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids)
{
- RGWSI_RADOS::Pool index_pool;
+ librados::IoCtx index_pool;
map<int, string> oids;
map<int, struct rgw_cls_list_ret> list_results;
int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, shard_id, idx_layout, &index_pool, &oids, bucket_instance_ids);
@@ -10166,7 +10164,7 @@ int RGWRados::cls_bucket_head(const DoutPrefixProvider *dpp, const RGWBucketInfo
return r;
}
- r = CLSRGWIssueGetDirHeader(index_pool.ioctx(), oids, list_results, cct->_conf->rgw_bucket_index_max_aio)();
+ r = CLSRGWIssueGetDirHeader(index_pool, oids, list_results, cct->_conf->rgw_bucket_index_max_aio)();
if (r < 0) {
ldpp_dout(dpp, 20) << "cls_bucket_head: CLSRGWIssueGetDirHeader() returned "
<< r << dendl;
@@ -10180,25 +10178,24 @@ int RGWRados::cls_bucket_head(const DoutPrefixProvider *dpp, const RGWBucketInfo
return 0;
}
-int RGWRados::cls_bucket_head_async(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio)
+int RGWRados::cls_bucket_head_async(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info,
+ const rgw::bucket_index_layout_generation& idx_layout, int shard_id,
+ boost::intrusive_ptr<RGWGetDirHeader_CB> cb, int *num_aio)
{
- RGWSI_RADOS::Pool index_pool;
+ librados::IoCtx index_pool;
map<int, string> bucket_objs;
int r = svc.bi_rados->open_bucket_index(dpp, bucket_info, shard_id, idx_layout, &index_pool, &bucket_objs, nullptr);
if (r < 0)
return r;
- map<int, string>::iterator iter = bucket_objs.begin();
- for (; iter != bucket_objs.end(); ++iter) {
- r = cls_rgw_get_dir_header_async(index_pool.ioctx(), iter->second, static_cast<RGWGetDirHeader_CB*>(ctx->get()));
+ for (auto& pair : bucket_objs) {
+ r = cls_rgw_get_dir_header_async(index_pool, pair.second, cb);
if (r < 0) {
- ctx->put();
- break;
- } else {
- (*num_aio)++;
+ return r;
}
+ (*num_aio)++;
}
- return r;
+ return 0;
}
int RGWRados::check_bucket_shards(const RGWBucketInfo& bucket_info,
@@ -10340,7 +10337,7 @@ int RGWRados::delete_raw_obj_aio(const DoutPrefixProvider *dpp, const rgw_raw_ob
cls_rgw_remove_obj(op, prefixes);
AioCompletion *c = librados::Rados::aio_create_completion(nullptr, nullptr);
- ret = ref.pool.ioctx().aio_operate(ref.obj.oid, c, &op);
+ ret = ref.ioctx.aio_operate(ref.obj.oid, c, &op);
if (ret < 0) {
ldpp_dout(dpp, -1) << "ERROR: AioOperate failed with ret=" << ret << dendl;
c->release();
@@ -10380,7 +10377,7 @@ int RGWRados::delete_obj_aio(const DoutPrefixProvider *dpp, const rgw_obj& obj,
cls_rgw_remove_obj(op, prefixes);
AioCompletion *c = librados::Rados::aio_create_completion(nullptr, nullptr);
- ret = ref.pool.ioctx().aio_operate(ref.obj.oid, c, &op);
+ ret = ref.ioctx.aio_operate(ref.obj.oid, c, &op);
if (ret < 0) {
ldpp_dout(dpp, -1) << "ERROR: AioOperate failed with ret=" << ret << dendl;
c->release();
diff --git a/src/rgw/driver/rados/rgw_rados.h b/src/rgw/driver/rados/rgw_rados.h
index f61730cb4de..986dc091163 100644
--- a/src/rgw/driver/rados/rgw_rados.h
+++ b/src/rgw/driver/rados/rgw_rados.h
@@ -33,13 +33,13 @@
#include "rgw_aio.h"
#include "rgw_d3n_cacherequest.h"
-#include "services/svc_rados.h"
#include "services/svc_bi_rados.h"
#include "common/Throttle.h"
#include "common/ceph_mutex.h"
#include "rgw_cache.h"
#include "rgw_sal_fwd.h"
#include "rgw_pubsub.h"
+#include "rgw_tools.h"
struct D3nDataCache;
@@ -576,7 +576,7 @@ public:
}
- int get_raw_obj_ref(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, rgw_rados_ref *ref);
+ int get_raw_obj_ref(const DoutPrefixProvider *dpp, rgw_raw_obj obj, rgw_rados_ref *ref);
int list_raw_objects_init(const DoutPrefixProvider *dpp, const rgw_pool& pool, const std::string& marker, RGWListRawObjsCtx *ctx);
int list_raw_objects_next(const DoutPrefixProvider *dpp, const std::string& prefix_filter, int max,
@@ -651,7 +651,7 @@ public:
RGWRados *store;
rgw_bucket bucket;
int shard_id;
- RGWSI_RADOS::Obj bucket_obj;
+ rgw_rados_ref bucket_obj;
explicit BucketShard(RGWRados *_store) : store(_store), shard_id(-1) {}
int init(const rgw_bucket& _bucket, const rgw_obj& obj,
@@ -664,7 +664,7 @@ public:
friend std::ostream& operator<<(std::ostream& out, const BucketShard& bs) {
out << "BucketShard:{ bucket=" << bs.bucket <<
", shard_id=" << bs.shard_id <<
- ", bucket_ojb=" << bs.bucket_obj << "}";
+ ", bucket_obj=" << bs.bucket_obj << "}";
return out;
}
};
@@ -1023,14 +1023,13 @@ public:
rgw_obj_key end_marker;
std::string ns;
bool enforce_ns;
- RGWAccessListFilter* access_list_filter;
+ rgw::AccessListFilter access_list_filter;
RGWBucketListNameFilter force_check_filter;
bool list_versions;
bool allow_unordered;
Params() :
enforce_ns(true),
- access_list_filter(nullptr),
list_versions(false),
allow_unordered(false)
{}
@@ -1401,7 +1400,7 @@ public:
int decode_policy(const DoutPrefixProvider *dpp, bufferlist& bl, ACLOwner *owner);
int get_bucket_stats(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout, int shard_id, std::string *bucket_ver, std::string *master_ver,
std::map<RGWObjCategory, RGWStorageStats>& stats, std::string *max_marker, bool* syncstopped = NULL);
- int get_bucket_stats_async(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout, int shard_id, RGWGetBucketStats_CB *cb);
+ int get_bucket_stats_async(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout, int shard_id, boost::intrusive_ptr<rgw::sal::ReadStatsCB> cb);
int put_bucket_instance_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, const std::map<std::string, bufferlist> *pattrs, const DoutPrefixProvider *dpp, optional_yield y);
/* xxx dang obj_ctx -> svc */
@@ -1483,7 +1482,7 @@ public:
int cls_bucket_head_async(const DoutPrefixProvider *dpp,
const RGWBucketInfo& bucket_info,
const rgw::bucket_index_layout_generation& idx_layout,
- int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio);
+ int shard_id, boost::intrusive_ptr<RGWGetDirHeader_CB> cb, int *num_aio);
int bi_get_instance(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_dir_entry *dirent, optional_yield y);
int bi_get_olh(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_olh_entry *olh, optional_yield y);
int bi_get(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, BIIndexType index_type, rgw_cls_bi_entry *entry, optional_yield y);
@@ -1638,7 +1637,7 @@ public:
*/
int pool_iterate(const DoutPrefixProvider *dpp, RGWPoolIterCtx& ctx, uint32_t num,
std::vector<rgw_bucket_dir_entry>& objs,
- bool *is_truncated, RGWAccessListFilter *filter);
+ bool *is_truncated, const rgw::AccessListFilter& filter);
uint64_t next_bucket_id();
diff --git a/src/rgw/driver/rados/rgw_sal_rados.cc b/src/rgw/driver/rados/rgw_sal_rados.cc
index 478d7a5b706..5ede8d44fa9 100644
--- a/src/rgw/driver/rados/rgw_sal_rados.cc
+++ b/src/rgw/driver/rados/rgw_sal_rados.cc
@@ -187,7 +187,7 @@ int RadosUser::read_stats(const DoutPrefixProvider *dpp,
return store->ctl()->user->read_stats(dpp, get_id(), stats, y, last_stats_sync, last_stats_update);
}
-int RadosUser::read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb)
+int RadosUser::read_stats_async(const DoutPrefixProvider *dpp, boost::intrusive_ptr<ReadStatsCB> cb)
{
return store->svc()->user->read_stats_async(dpp, get_id(), cb);
}
@@ -531,7 +531,7 @@ int RadosBucket::read_stats(const DoutPrefixProvider *dpp,
int RadosBucket::read_stats_async(const DoutPrefixProvider *dpp,
const bucket_index_layout_generation& idx_layout,
- int shard_id, RGWGetBucketStats_CB* ctx)
+ int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx)
{
return store->getRados()->get_bucket_stats_async(dpp, get_info(), idx_layout, shard_id, ctx);
}
@@ -684,7 +684,7 @@ int RadosBucket::set_acl(const DoutPrefixProvider* dpp, RGWAccessControlPolicy &
map<string, bufferlist>& attrs = get_attrs();
attrs[RGW_ATTR_ACL] = aclbl;
- info.owner = acl.get_owner().get_id();
+ info.owner = acl.get_owner().id;
int r = store->ctl()->bucket->store_bucket_instance_info(info.bucket,
info, y, dpp,
@@ -751,13 +751,12 @@ int RadosBucket::list_multiparts(const DoutPrefixProvider *dpp,
{
rgw::sal::Bucket::ListParams params;
rgw::sal::Bucket::ListResults results;
- MultipartMetaFilter mp_filter;
params.prefix = prefix;
params.delim = delim;
params.marker = marker;
params.ns = RGW_OBJ_NS_MULTIPART;
- params.access_list_filter = &mp_filter;
+ params.access_list_filter = MultipartMetaFilter;
int ret = list(dpp, params, max_uploads, results, y);
@@ -767,8 +766,10 @@ int RadosBucket::list_multiparts(const DoutPrefixProvider *dpp,
if (!results.objs.empty()) {
for (const rgw_bucket_dir_entry& dentry : results.objs) {
rgw_obj_key key(dentry.key);
- ACLOwner owner(rgw_user(dentry.meta.owner));
- owner.set_name(dentry.meta.owner_display_name);
+ const ACLOwner owner{
+ .id = rgw_user(dentry.meta.owner),
+ .display_name = dentry.meta.owner_display_name
+ };
uploads.push_back(this->get_multipart_upload(key.name,
std::nullopt, std::move(owner), dentry.meta.mtime));
}
@@ -1591,7 +1592,7 @@ int RadosObject::get_torrent_info(const DoutPrefixProvider* dpp,
librados::ObjectReadOperation op;
op.omap_get_vals_by_keys(keys, &result, nullptr);
- ret = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, nullptr, y);
+ ret = rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, nullptr, y);
if (ret < 0) {
return ret;
}
@@ -1648,7 +1649,7 @@ int RadosObject::chown(User& new_user, const DoutPrefixProvider* dpp, optional_y
}
bufferlist& bl = aiter->second;
- RGWAccessControlPolicy policy(store->ctx());
+ RGWAccessControlPolicy policy;
ACLOwner owner;
auto bliter = bl.cbegin();
try {
@@ -1664,16 +1665,16 @@ int RadosObject::chown(User& new_user, const DoutPrefixProvider* dpp, optional_y
RGWAccessControlList& acl = policy.get_acl();
//Remove grant that is set to old owner
- acl.remove_canon_user_grant(owner.get_id());
+ acl.remove_canon_user_grant(owner.id);
//Create a grant and add grant
ACLGrant grant;
grant.set_canon(new_user.get_id(), new_user.get_display_name(), RGW_PERM_FULL_CONTROL);
- acl.add_grant(&grant);
+ acl.add_grant(grant);
//Update the ACL owner to the new user
- owner.set_id(new_user.get_id());
- owner.set_name(new_user.get_display_name());
+ owner.id = new_user.get_id();
+ owner.display_name = new_user.get_display_name();
policy.set_owner(owner);
bl.clear();
@@ -1987,7 +1988,7 @@ RadosObject::RadosDeleteOp::RadosDeleteOp(RadosObject *_source) :
int RadosObject::RadosDeleteOp::delete_obj(const DoutPrefixProvider* dpp, optional_yield y)
{
- parent_op.params.bucket_owner = params.bucket_owner.get_id();
+ parent_op.params.bucket_owner = params.bucket_owner.id;
parent_op.params.versioning_status = params.versioning_status;
parent_op.params.obj_owner = params.obj_owner;
parent_op.params.olh_epoch = params.olh_epoch;
@@ -2221,7 +2222,7 @@ int RadosMultipartUpload::abort(const DoutPrefixProvider *dpp, CephContext *cct,
}
std::unique_ptr<rgw::sal::Object::DeleteOp> del_op = meta_obj->get_delete_op();
- del_op->params.bucket_owner = bucket->get_info().owner;
+ del_op->params.bucket_owner.id = bucket->get_info().owner;
del_op->params.versioning_status = 0;
if (!remove_objs.empty()) {
del_op->params.remove_objs = &remove_objs;
@@ -2273,7 +2274,7 @@ int RadosMultipartUpload::init(const DoutPrefixProvider *dpp, optional_yield y,
RGWRados::Object::Write obj_op(&op_target);
op_target.set_versioning_disabled(true); /* no versioning for multipart meta */
- obj_op.meta.owner = owner.get_id();
+ obj_op.meta.owner = owner.id;
obj_op.meta.category = RGWObjCategory::MultiMeta;
obj_op.meta.flags = PUT_OBJ_CREATE_EXCL;
obj_op.meta.mtime = &mtime;
@@ -2562,7 +2563,7 @@ int RadosMultipartUpload::complete(const DoutPrefixProvider *dpp,
obj_op.meta.remove_objs = &remove_objs;
obj_op.meta.ptag = &tag; /* use req_id as operation tag */
- obj_op.meta.owner = owner.get_id();
+ obj_op.meta.owner = owner.id;
obj_op.meta.flags = PUT_OBJ_CREATE;
obj_op.meta.modify_tail = true;
obj_op.meta.completeMultipart = true;
diff --git a/src/rgw/driver/rados/rgw_sal_rados.h b/src/rgw/driver/rados/rgw_sal_rados.h
index 7efd7757c3c..fbf14410c17 100644
--- a/src/rgw/driver/rados/rgw_sal_rados.h
+++ b/src/rgw/driver/rados/rgw_sal_rados.h
@@ -271,7 +271,7 @@ class RadosUser : public StoreUser {
optional_yield y, RGWStorageStats* stats,
ceph::real_time* last_stats_sync = nullptr,
ceph::real_time* last_stats_update = nullptr) override;
- virtual int read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb) override;
+ virtual int read_stats_async(const DoutPrefixProvider *dpp, boost::intrusive_ptr<ReadStatsCB> cb) override;
virtual int complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) override;
virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
bool* is_truncated, RGWUsageIter& usage_iter,
@@ -512,7 +512,7 @@ class RadosBucket : public StoreBucket {
bool* syncstopped = nullptr) override;
virtual int read_stats_async(const DoutPrefixProvider *dpp,
const bucket_index_layout_generation& idx_layout,
- int shard_id, RGWGetBucketStats_CB* ctx) override;
+ int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx) override;
int sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y,
RGWBucketEnt* ent) override;
int check_bucket_shards(const DoutPrefixProvider* dpp, uint64_t num_objs,
diff --git a/src/rgw/driver/rados/rgw_service.cc b/src/rgw/driver/rados/rgw_service.cc
index 4fcb1ebdef7..5b78472dfe3 100644
--- a/src/rgw/driver/rados/rgw_service.cc
+++ b/src/rgw/driver/rados/rgw_service.cc
@@ -17,7 +17,6 @@
#include "services/svc_meta_be_otp.h"
#include "services/svc_notify.h"
#include "services/svc_otp.h"
-#include "services/svc_rados.h"
#include "services/svc_zone.h"
#include "services/svc_zone_utils.h"
#include "services/svc_quota.h"
@@ -31,6 +30,7 @@
#include "common/errno.h"
#include "rgw_bucket.h"
+#include "rgw_cr_rados.h"
#include "rgw_datalog.h"
#include "rgw_metadata.h"
#include "rgw_otp.h"
@@ -51,6 +51,7 @@ int RGWServices_Def::init(CephContext *cct,
bool have_cache,
bool raw,
bool run_sync,
+ librados::Rados* rados,
optional_yield y,
const DoutPrefixProvider *dpp)
{
@@ -68,7 +69,6 @@ int RGWServices_Def::init(CephContext *cct,
meta_be_otp = std::make_unique<RGWSI_MetaBackend_OTP>(cct);
notify = std::make_unique<RGWSI_Notify>(cct);
otp = std::make_unique<RGWSI_OTP>(cct);
- rados = std::make_unique<RGWSI_RADOS>(cct);
zone = std::make_unique<RGWSI_Zone>(cct);
zone_utils = std::make_unique<RGWSI_ZoneUtils>(cct);
quota = std::make_unique<RGWSI_Quota>(cct);
@@ -77,6 +77,8 @@ int RGWServices_Def::init(CephContext *cct,
sysobj_core = std::make_unique<RGWSI_SysObj_Core>(cct);
user_rados = std::make_unique<RGWSI_User_RADOS>(cct);
role_rados = std::make_unique<RGWSI_Role_RADOS>(cct);
+ async_processor = std::make_unique<RGWAsyncRadosProcessor>(
+ cct, cct->_conf->rgw_num_async_rados_threads);
if (have_cache) {
sysobj_cache = std::make_unique<RGWSI_SysObj_Cache>(dpp, cct);
@@ -84,8 +86,9 @@ int RGWServices_Def::init(CephContext *cct,
vector<RGWSI_MetaBackend *> meta_bes{meta_be_sobj.get(), meta_be_otp.get()};
+ async_processor->start();
finisher->init();
- bi_rados->init(zone.get(), rados.get(), bilog_rados.get(), datalog_rados.get());
+ bi_rados->init(zone.get(), rados, bilog_rados.get(), datalog_rados.get());
bilog_rados->init(bi_rados.get());
bucket_sobj->init(zone.get(), sysobj.get(), sysobj_cache.get(),
bi_rados.get(), meta.get(), meta_be_sobj.get(),
@@ -94,27 +97,27 @@ int RGWServices_Def::init(CephContext *cct,
sysobj.get(),
sysobj_cache.get(),
bucket_sobj.get());
- cls->init(zone.get(), rados.get());
- config_key_rados->init(rados.get());
- mdlog->init(rados.get(), zone.get(), sysobj.get(), cls.get());
+ cls->init(zone.get(), rados);
+ config_key_rados->init(rados);
+ mdlog->init(rados, zone.get(), sysobj.get(), cls.get(),
+ async_processor.get());
meta->init(sysobj.get(), mdlog.get(), meta_bes);
meta_be_sobj->init(sysobj.get(), mdlog.get());
meta_be_otp->init(sysobj.get(), mdlog.get(), cls.get());
- notify->init(zone.get(), rados.get(), finisher.get());
+ notify->init(zone.get(), rados, finisher.get());
otp->init(zone.get(), meta.get(), meta_be_otp.get());
- rados->init();
- zone->init(sysobj.get(), rados.get(), sync_modules.get(), bucket_sync_sobj.get());
- zone_utils->init(rados.get(), zone.get());
+ zone->init(sysobj.get(), rados, sync_modules.get(), bucket_sync_sobj.get());
+ zone_utils->init(rados, zone.get());
quota->init(zone.get());
sync_modules->init(zone.get());
- sysobj_core->core_init(rados.get(), zone.get());
+ sysobj_core->core_init(rados, zone.get());
if (have_cache) {
- sysobj_cache->init(rados.get(), zone.get(), notify.get());
- sysobj->init(rados.get(), sysobj_cache.get());
+ sysobj_cache->init(rados, zone.get(), notify.get());
+ sysobj->init(rados, sysobj_cache.get());
} else {
- sysobj->init(rados.get(), sysobj_core.get());
+ sysobj->init(rados, sysobj_core.get());
}
- user_rados->init(rados.get(), zone.get(), sysobj.get(), sysobj_cache.get(),
+ user_rados->init(rados, zone.get(), sysobj.get(), sysobj_cache.get(),
meta.get(), meta_be_sobj.get(), sync_modules.get());
role_rados->init(zone.get(), meta.get(), meta_be_sobj.get(), sysobj.get());
@@ -134,12 +137,6 @@ int RGWServices_Def::init(CephContext *cct,
}
}
- r = rados->start(y, dpp);
- if (r < 0) {
- ldpp_dout(dpp, 0) << "ERROR: failed to start rados service (" << cpp_strerror(-r) << dendl;
- return r;
- }
-
if (!raw) {
r = zone->start(y, dpp);
if (r < 0) {
@@ -149,7 +146,7 @@ int RGWServices_Def::init(CephContext *cct,
r = datalog_rados->start(dpp, &zone->get_zone(),
zone->get_zone_params(),
- rados->get_rados_handle());
+ rados);
if (r < 0) {
ldpp_dout(dpp, 0) << "ERROR: failed to start datalog_rados service (" << cpp_strerror(-r) << dendl;
return r;
@@ -299,18 +296,18 @@ void RGWServices_Def::shutdown()
quota->shutdown();
zone_utils->shutdown();
zone->shutdown();
- rados->shutdown();
+ async_processor->stop();
has_shutdown = true;
-
}
-
-int RGWServices::do_init(CephContext *_cct, bool have_cache, bool raw, bool run_sync, optional_yield y, const DoutPrefixProvider *dpp)
+int RGWServices::do_init(CephContext *_cct, bool have_cache, bool raw,
+ bool run_sync, librados::Rados* rados,
+ optional_yield y, const DoutPrefixProvider *dpp)
{
cct = _cct;
- int r = _svc.init(cct, have_cache, raw, run_sync, y, dpp);
+ int r = _svc.init(cct, have_cache, raw, run_sync, rados, y, dpp);
if (r < 0) {
return r;
}
@@ -333,7 +330,6 @@ int RGWServices::do_init(CephContext *_cct, bool have_cache, bool raw, bool run_
meta_be_otp = _svc.meta_be_otp.get();
notify = _svc.notify.get();
otp = _svc.otp.get();
- rados = _svc.rados.get();
zone = _svc.zone.get();
zone_utils = _svc.zone_utils.get();
quota = _svc.quota.get();
@@ -343,6 +339,7 @@ int RGWServices::do_init(CephContext *_cct, bool have_cache, bool raw, bool run_
core = _svc.sysobj_core.get();
user = _svc.user_rados.get();
role = _svc.role_rados.get();
+ async_processor = _svc.async_processor.get();
return 0;
}
diff --git a/src/rgw/driver/rados/rgw_service.h b/src/rgw/driver/rados/rgw_service.h
index 4c0b8d8421f..7c05f043a47 100644
--- a/src/rgw/driver/rados/rgw_service.h
+++ b/src/rgw/driver/rados/rgw_service.h
@@ -62,7 +62,6 @@ class RGWSI_MetaBackend_SObj;
class RGWSI_MetaBackend_OTP;
class RGWSI_Notify;
class RGWSI_OTP;
-class RGWSI_RADOS;
class RGWSI_Zone;
class RGWSI_ZoneUtils;
class RGWSI_Quota;
@@ -74,6 +73,7 @@ class RGWSI_User;
class RGWSI_User_RADOS;
class RGWDataChangesLog;
class RGWSI_Role_RADOS;
+class RGWAsyncRadosProcessor;
struct RGWServices_Def
{
@@ -93,7 +93,6 @@ struct RGWServices_Def
std::unique_ptr<RGWSI_MetaBackend_OTP> meta_be_otp;
std::unique_ptr<RGWSI_Notify> notify;
std::unique_ptr<RGWSI_OTP> otp;
- std::unique_ptr<RGWSI_RADOS> rados;
std::unique_ptr<RGWSI_Zone> zone;
std::unique_ptr<RGWSI_ZoneUtils> zone_utils;
std::unique_ptr<RGWSI_Quota> quota;
@@ -104,11 +103,14 @@ struct RGWServices_Def
std::unique_ptr<RGWSI_User_RADOS> user_rados;
std::unique_ptr<RGWDataChangesLog> datalog_rados;
std::unique_ptr<RGWSI_Role_RADOS> role_rados;
+ std::unique_ptr<RGWAsyncRadosProcessor> async_processor;
RGWServices_Def();
~RGWServices_Def();
- int init(CephContext *cct, bool have_cache, bool raw_storage, bool run_sync, optional_yield y, const DoutPrefixProvider *dpp);
+ int init(CephContext *cct, bool have_cache, bool raw_storage, bool run_sync,
+ librados::Rados* rados, optional_yield y,
+ const DoutPrefixProvider *dpp);
void shutdown();
};
@@ -137,7 +139,6 @@ struct RGWServices
RGWSI_MetaBackend *meta_be_otp{nullptr};
RGWSI_Notify *notify{nullptr};
RGWSI_OTP *otp{nullptr};
- RGWSI_RADOS *rados{nullptr};
RGWSI_Zone *zone{nullptr};
RGWSI_ZoneUtils *zone_utils{nullptr};
RGWSI_Quota *quota{nullptr};
@@ -147,15 +148,21 @@ struct RGWServices
RGWSI_SysObj_Core *core{nullptr};
RGWSI_User *user{nullptr};
RGWSI_Role_RADOS *role{nullptr};
+ RGWAsyncRadosProcessor* async_processor;
- int do_init(CephContext *cct, bool have_cache, bool raw_storage, bool run_sync, optional_yield y, const DoutPrefixProvider *dpp);
+ int do_init(CephContext *cct, bool have_cache, bool raw_storage,
+ bool run_sync, librados::Rados* rados, optional_yield y,
+ const DoutPrefixProvider *dpp);
- int init(CephContext *cct, bool have_cache, bool run_sync, optional_yield y, const DoutPrefixProvider *dpp) {
- return do_init(cct, have_cache, false, run_sync, y, dpp);
+ int init(CephContext *cct, bool have_cache, bool run_sync,
+ librados::Rados* rados, optional_yield y,
+ const DoutPrefixProvider *dpp) {
+ return do_init(cct, have_cache, false, run_sync, rados, y, dpp);
}
- int init_raw(CephContext *cct, bool have_cache, optional_yield y, const DoutPrefixProvider *dpp) {
- return do_init(cct, have_cache, true, false, y, dpp);
+ int init_raw(CephContext *cct, bool have_cache, librados::Rados* rados,
+ optional_yield y, const DoutPrefixProvider *dpp) {
+ return do_init(cct, have_cache, true, false, rados, y, dpp);
}
void shutdown() {
_svc.shutdown();
diff --git a/src/rgw/driver/rados/rgw_sync_error_repo.cc b/src/rgw/driver/rados/rgw_sync_error_repo.cc
index 44305b60b6b..aca671039ee 100644
--- a/src/rgw/driver/rados/rgw_sync_error_repo.cc
+++ b/src/rgw/driver/rados/rgw_sync_error_repo.cc
@@ -15,7 +15,6 @@
#include "rgw_sync_error_repo.h"
#include "rgw_coroutine.h"
#include "rgw_sal.h"
-#include "services/svc_rados.h"
#include "cls/cmpomap/client.h"
namespace rgw::error_repo {
@@ -118,16 +117,17 @@ int remove(librados::ObjectWriteOperation& op,
}
class RGWErrorRepoWriteCR : public RGWSimpleCoroutine {
- RGWSI_RADOS::Obj obj;
+ librados::Rados* rados;
+ const rgw_raw_obj& raw_obj;
std::string key;
ceph::real_time timestamp;
boost::intrusive_ptr<RGWAioCompletionNotifier> cn;
public:
- RGWErrorRepoWriteCR(RGWSI_RADOS* rados, const rgw_raw_obj& raw_obj,
+ RGWErrorRepoWriteCR(librados::Rados* rados, const rgw_raw_obj& raw_obj,
const std::string& key, ceph::real_time timestamp)
- : RGWSimpleCoroutine(rados->ctx()),
- obj(rados->obj(raw_obj)),
+ : RGWSimpleCoroutine(static_cast<CephContext*>(rados->cct())),
+ raw_obj(raw_obj),
key(key), timestamp(timestamp)
{}
@@ -137,13 +137,14 @@ class RGWErrorRepoWriteCR : public RGWSimpleCoroutine {
if (r < 0) {
return r;
}
- r = obj.open(dpp);
+ rgw_rados_ref ref;
+ r = rgw_get_rados_ref(dpp, rados, raw_obj, &ref);
if (r < 0) {
return r;
}
cn = stack->create_completion_notifier();
- return obj.aio_operate(cn->completion(), &op);
+ return ref.aio_operate(cn->completion(), &op);
}
int request_complete() override {
@@ -151,7 +152,7 @@ class RGWErrorRepoWriteCR : public RGWSimpleCoroutine {
}
};
-RGWCoroutine* write_cr(RGWSI_RADOS* rados,
+RGWCoroutine* write_cr(librados::Rados* rados,
const rgw_raw_obj& obj,
const std::string& key,
ceph::real_time timestamp)
@@ -161,16 +162,17 @@ RGWCoroutine* write_cr(RGWSI_RADOS* rados,
class RGWErrorRepoRemoveCR : public RGWSimpleCoroutine {
- RGWSI_RADOS::Obj obj;
+ librados::Rados* rados;
+ const rgw_raw_obj& raw_obj;
std::string key;
ceph::real_time timestamp;
boost::intrusive_ptr<RGWAioCompletionNotifier> cn;
public:
- RGWErrorRepoRemoveCR(RGWSI_RADOS* rados, const rgw_raw_obj& raw_obj,
+ RGWErrorRepoRemoveCR(librados::Rados* rados, const rgw_raw_obj& raw_obj,
const std::string& key, ceph::real_time timestamp)
- : RGWSimpleCoroutine(rados->ctx()),
- obj(rados->obj(raw_obj)),
+ : RGWSimpleCoroutine(static_cast<CephContext*>(rados->cct())),
+ raw_obj(raw_obj),
key(key), timestamp(timestamp)
{}
@@ -180,13 +182,14 @@ class RGWErrorRepoRemoveCR : public RGWSimpleCoroutine {
if (r < 0) {
return r;
}
- r = obj.open(dpp);
+ rgw_rados_ref ref;
+ r = rgw_get_rados_ref(dpp, rados, raw_obj, &ref);
if (r < 0) {
return r;
}
cn = stack->create_completion_notifier();
- return obj.aio_operate(cn->completion(), &op);
+ return ref.aio_operate(cn->completion(), &op);
}
int request_complete() override {
@@ -194,7 +197,7 @@ class RGWErrorRepoRemoveCR : public RGWSimpleCoroutine {
}
};
-RGWCoroutine* remove_cr(RGWSI_RADOS* rados,
+RGWCoroutine* remove_cr(librados::Rados* rados,
const rgw_raw_obj& obj,
const std::string& key,
ceph::real_time timestamp)
diff --git a/src/rgw/driver/rados/rgw_sync_error_repo.h b/src/rgw/driver/rados/rgw_sync_error_repo.h
index 60525d281f0..7760c9fe198 100644
--- a/src/rgw/driver/rados/rgw_sync_error_repo.h
+++ b/src/rgw/driver/rados/rgw_sync_error_repo.h
@@ -19,7 +19,6 @@
#include "include/buffer_fwd.h"
#include "common/ceph_time.h"
-class RGWSI_RADOS;
class RGWCoroutine;
struct rgw_raw_obj;
struct rgw_bucket_shard;
@@ -42,7 +41,7 @@ ceph::real_time decode_value(const ceph::bufferlist& bl);
int write(librados::ObjectWriteOperation& op,
const std::string& key,
ceph::real_time timestamp);
-RGWCoroutine* write_cr(RGWSI_RADOS* rados,
+RGWCoroutine* write_cr(librados::Rados* rados,
const rgw_raw_obj& obj,
const std::string& key,
ceph::real_time timestamp);
@@ -51,7 +50,7 @@ RGWCoroutine* write_cr(RGWSI_RADOS* rados,
int remove(librados::ObjectWriteOperation& op,
const std::string& key,
ceph::real_time timestamp);
-RGWCoroutine* remove_cr(RGWSI_RADOS* rados,
+RGWCoroutine* remove_cr(librados::Rados* rados,
const rgw_raw_obj& obj,
const std::string& key,
ceph::real_time timestamp);
diff --git a/src/rgw/driver/rados/rgw_sync_module_aws.cc b/src/rgw/driver/rados/rgw_sync_module_aws.cc
index 46b99968a50..3c269a74949 100644
--- a/src/rgw/driver/rados/rgw_sync_module_aws.cc
+++ b/src/rgw/driver/rados/rgw_sync_module_aws.cc
@@ -705,7 +705,6 @@ static int do_decode_rest_obj(const DoutPrefixProvider *dpp, CephContext *cct, m
}
}
- info->acls.set_ctx(cct);
auto aiter = attrs.find(RGW_ATTR_ACL);
if (aiter != attrs.end()) {
bufferlist& bl = aiter->second;
diff --git a/src/rgw/driver/rados/rgw_sync_module_es.cc b/src/rgw/driver/rados/rgw_sync_module_es.cc
index 13c2057bf77..e3353dc1fc7 100644
--- a/src/rgw/driver/rados/rgw_sync_module_es.cc
+++ b/src/rgw/driver/rados/rgw_sync_module_es.cc
@@ -501,15 +501,12 @@ struct es_obj_metadata {
const RGWAccessControlList& acl = policy.get_acl();
- permissions.insert(policy.get_owner().get_id().to_str());
- for (auto acliter : acl.get_grant_map()) {
+ permissions.insert(policy.get_owner().id.to_str());
+ for (const auto& acliter : acl.get_grant_map()) {
const ACLGrant& grant = acliter.second;
- if (grant.get_type().get_type() == ACL_TYPE_CANON_USER &&
- ((uint32_t)grant.get_permission().get_permissions() & RGW_PERM_READ) != 0) {
- rgw_user user;
- if (grant.get_id(user)) {
- permissions.insert(user.to_str());
- }
+ const auto* user = grant.get_user();
+ if (user && (grant.get_permission().get_permissions() & RGW_PERM_READ) != 0) {
+ permissions.insert(user->id.to_str());
}
}
} else if (attr_name == RGW_ATTR_TAGS) {
diff --git a/src/rgw/driver/rados/rgw_sync_module_es_rest.cc b/src/rgw/driver/rados/rgw_sync_module_es_rest.cc
index db9d48adb36..b456f0b4097 100644
--- a/src/rgw/driver/rados/rgw_sync_module_es_rest.cc
+++ b/src/rgw/driver/rados/rgw_sync_module_es_rest.cc
@@ -347,7 +347,7 @@ public:
s->formatter->dump_format("ETag", "\"%s\"", e.meta.etag.c_str());
s->formatter->dump_string("ContentType", e.meta.content_type.c_str());
s->formatter->dump_string("StorageClass", e.meta.storage_class.c_str());
- dump_owner(s, e.owner.get_id(), e.owner.get_display_name());
+ dump_owner(s, e.owner.id, e.owner.display_name);
s->formatter->open_array_section("CustomMetadata");
for (auto& m : e.meta.custom_str) {
s->formatter->open_object_section("Entry");
diff --git a/src/rgw/driver/rados/rgw_tools.cc b/src/rgw/driver/rados/rgw_tools.cc
index 635e5ce887d..269c790e401 100644
--- a/src/rgw/driver/rados/rgw_tools.cc
+++ b/src/rgw/driver/rados/rgw_tools.cc
@@ -1,6 +1,8 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab ft=cpp
+#include "auth/AuthRegistry.h"
+
#include "common/errno.h"
#include "librados/librados_asio.h"
@@ -95,6 +97,24 @@ int rgw_init_ioctx(const DoutPrefixProvider *dpp,
return 0;
}
+int rgw_get_rados_ref(const DoutPrefixProvider* dpp, librados::Rados* rados,
+ rgw_raw_obj obj, rgw_rados_ref* ref)
+{
+ ref->obj = std::move(obj);
+
+ int r = rgw_init_ioctx(dpp, rados, ref->obj.pool,
+ ref->ioctx, true, false);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: creating ioctx (pool=" << ref->obj.pool
+ << "); r=" << r << dendl;
+ return r;
+ }
+
+ ref->ioctx.locator_set_key(ref->obj.loc);
+ return 0;
+}
+
+
map<string, bufferlist>* no_change_attrs() {
static map<string, bufferlist> no_change;
return &no_change;
@@ -335,8 +355,7 @@ int RGWDataAccess::Object::put(bufferlist& data,
string req_id = driver->zone_unique_id(driver->get_new_req_id());
std::unique_ptr<rgw::sal::Writer> processor;
- processor = driver->get_atomic_writer(dpp, y, obj.get(),
- owner.get_id(),
+ processor = driver->get_atomic_writer(dpp, y, obj.get(), owner.id,
nullptr, olh_epoch, req_id);
int ret = processor->prepare(y);
@@ -393,9 +412,10 @@ int RGWDataAccess::Object::put(bufferlist& data,
}
if (!aclbl) {
- RGWAccessControlPolicy_S3 policy(cct);
+ RGWAccessControlPolicy policy;
- policy.create_canned(bucket->policy.get_owner(), bucket->policy.get_owner(), string()); /* default private policy */
+ const auto& owner = bucket->policy.get_owner();
+ policy.create_default(owner.id, owner.display_name); // default private policy
policy.encode(aclbl.emplace());
}
@@ -435,3 +455,82 @@ void rgw_complete_aio_completion(librados::AioCompletion* c, int r) {
librados::CB_AioCompleteAndSafe cb(pc);
cb(r);
}
+
+bool rgw_check_secure_mon_conn(const DoutPrefixProvider *dpp)
+{
+ AuthRegistry reg(dpp->get_cct());
+
+ reg.refresh_config();
+
+ std::vector<uint32_t> methods;
+ std::vector<uint32_t> modes;
+
+ reg.get_supported_methods(CEPH_ENTITY_TYPE_MON, &methods, &modes);
+ ldpp_dout(dpp, 20) << __func__ << "(): auth registy supported: methods=" << methods << " modes=" << modes << dendl;
+
+ for (auto method : methods) {
+ if (!reg.is_secure_method(method)) {
+ ldpp_dout(dpp, 20) << __func__ << "(): method " << method << " is insecure" << dendl;
+ return false;
+ }
+ }
+
+ for (auto mode : modes) {
+ if (!reg.is_secure_mode(mode)) {
+ ldpp_dout(dpp, 20) << __func__ << "(): mode " << mode << " is insecure" << dendl;
+ return false;
+ }
+ }
+
+ return true;
+}
+
+int rgw_clog_warn(librados::Rados* h, const string& msg)
+{
+ string cmd =
+ "{"
+ "\"prefix\": \"log\", "
+ "\"level\": \"warn\", "
+ "\"logtext\": [\"" + msg + "\"]"
+ "}";
+
+ bufferlist inbl;
+ return h->mon_command(cmd, inbl, nullptr, nullptr);
+}
+
+int rgw_list_pool(const DoutPrefixProvider *dpp,
+ librados::IoCtx& ioctx,
+ uint32_t max,
+ const rgw::AccessListFilter& filter,
+ std::string& marker,
+ std::vector<string> *oids,
+ bool *is_truncated)
+{
+ librados::ObjectCursor oc;
+ if (!oc.from_str(marker)) {
+ ldpp_dout(dpp, 10) << "failed to parse cursor: " << marker << dendl;
+ return -EINVAL;
+ }
+
+ auto iter = ioctx.nobjects_begin(oc);
+ /// Pool_iterate
+ if (iter == ioctx.nobjects_end())
+ return -ENOENT;
+
+ for (; oids->size() < max && iter != ioctx.nobjects_end(); ++iter) {
+ string oid = iter->get_oid();
+ ldpp_dout(dpp, 20) << "RGWRados::pool_iterate: got " << oid << dendl;
+
+ // fill it in with initial values; we may correct later
+ if (filter && !filter(oid, oid))
+ continue;
+
+ oids->push_back(oid);
+ }
+
+ marker = iter.get_cursor().to_str();
+ if (is_truncated)
+ *is_truncated = (iter != ioctx.nobjects_end());
+
+ return oids->size();
+}
diff --git a/src/rgw/driver/rados/rgw_tools.h b/src/rgw/driver/rados/rgw_tools.h
index 607b064b177..27a8b424ecc 100644
--- a/src/rgw/driver/rados/rgw_tools.h
+++ b/src/rgw/driver/rados/rgw_tools.h
@@ -3,7 +3,9 @@
#pragma once
+#include <functional>
#include <string>
+#include <string_view>
#include "include/types.h"
#include "include/ceph_hash.h"
@@ -21,7 +23,6 @@ class optional_yield;
struct obj_version;
-
int rgw_init_ioctx(const DoutPrefixProvider *dpp,
librados::Rados *rados, const rgw_pool& pool,
librados::IoCtx& ioctx,
@@ -104,6 +105,63 @@ int rgw_rados_notify(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, cons
bufferlist& bl, uint64_t timeout_ms, bufferlist* pbl,
optional_yield y);
+struct rgw_rados_ref {
+ librados::IoCtx ioctx;
+ rgw_raw_obj obj;
+
+
+ int operate(const DoutPrefixProvider* dpp, librados::ObjectReadOperation* op,
+ bufferlist* pbl, optional_yield y, int flags = 0) {
+ return rgw_rados_operate(dpp, ioctx, obj.oid, op, pbl, y, flags);
+ }
+
+ int operate(const DoutPrefixProvider* dpp, librados::ObjectWriteOperation* op,
+ optional_yield y, int flags = 0) {
+ return rgw_rados_operate(dpp, ioctx, obj.oid, op, y, flags);
+ }
+
+ int aio_operate(librados::AioCompletion* c,
+ librados::ObjectWriteOperation* op) {
+ return ioctx.aio_operate(obj.oid, c, op);
+ }
+
+ int aio_operate(librados::AioCompletion* c, librados::ObjectReadOperation* op,
+ bufferlist *pbl) {
+ return ioctx.aio_operate(obj.oid, c, op, pbl);
+ }
+
+ int watch(uint64_t* handle, librados::WatchCtx2* ctx) {
+ return ioctx.watch2(obj.oid, handle, ctx);
+ }
+
+ int aio_watch(librados::AioCompletion* c, uint64_t* handle,
+ librados::WatchCtx2 *ctx) {
+ return ioctx.aio_watch(obj.oid, c, handle, ctx);
+ }
+
+ int unwatch(uint64_t handle) {
+ return ioctx.unwatch2(handle);
+ }
+
+ int notify(const DoutPrefixProvider* dpp, bufferlist& bl, uint64_t timeout_ms,
+ bufferlist* pbl, optional_yield y) {
+ return rgw_rados_notify(dpp, ioctx, obj.oid, bl, timeout_ms, pbl, y);
+ }
+
+ void notify_ack(uint64_t notify_id, uint64_t cookie, bufferlist& bl) {
+ ioctx.notify_ack(obj.oid, notify_id, cookie, bl);
+ }
+};
+
+inline std::ostream& operator <<(std::ostream& m, const rgw_rados_ref& ref) {
+ return m << ref.obj;
+}
+
+int rgw_get_rados_ref(const DoutPrefixProvider* dpp, librados::Rados* rados,
+ rgw_raw_obj obj, rgw_rados_ref* ref);
+
+
+
int rgw_tools_init(const DoutPrefixProvider *dpp, CephContext *cct);
void rgw_tools_cleanup();
@@ -274,3 +332,14 @@ void rgw_complete_aio_completion(librados::AioCompletion* c, int r);
// (Currently providing nullptr will wipe all attributes.)
std::map<std::string, ceph::buffer::list>* no_change_attrs();
+
+bool rgw_check_secure_mon_conn(const DoutPrefixProvider *dpp);
+int rgw_clog_warn(librados::Rados* h, const std::string& msg);
+
+int rgw_list_pool(const DoutPrefixProvider *dpp,
+ librados::IoCtx& ioctx,
+ uint32_t max,
+ const rgw::AccessListFilter& filter,
+ std::string& marker,
+ std::vector<std::string> *oids,
+ bool *is_truncated);
diff --git a/src/rgw/driver/rados/rgw_trim_bilog.cc b/src/rgw/driver/rados/rgw_trim_bilog.cc
index 16ced4c2ac5..db998047005 100644
--- a/src/rgw/driver/rados/rgw_trim_bilog.cc
+++ b/src/rgw/driver/rados/rgw_trim_bilog.cc
@@ -270,18 +270,18 @@ class BucketTrimWatcher : public librados::WatchCtx2 {
}
// register a watch on the realm's control object
- r = ref.pool.ioctx().watch2(ref.obj.oid, &handle, this);
+ r = ref.ioctx.watch2(ref.obj.oid, &handle, this);
if (r == -ENOENT) {
constexpr bool exclusive = true;
- r = ref.pool.ioctx().create(ref.obj.oid, exclusive);
+ r = ref.ioctx.create(ref.obj.oid, exclusive);
if (r == -EEXIST || r == 0) {
- r = ref.pool.ioctx().watch2(ref.obj.oid, &handle, this);
+ r = ref.ioctx.watch2(ref.obj.oid, &handle, this);
}
}
if (r < 0) {
ldpp_dout(dpp, -1) << "Failed to watch " << ref.obj
<< " with " << cpp_strerror(-r) << dendl;
- ref.pool.ioctx().close();
+ ref.ioctx.close();
return r;
}
@@ -290,24 +290,24 @@ class BucketTrimWatcher : public librados::WatchCtx2 {
}
int restart() {
- int r = ref.pool.ioctx().unwatch2(handle);
+ int r = ref.ioctx.unwatch2(handle);
if (r < 0) {
lderr(store->ctx()) << "Failed to unwatch on " << ref.obj
<< " with " << cpp_strerror(-r) << dendl;
}
- r = ref.pool.ioctx().watch2(ref.obj.oid, &handle, this);
+ r = ref.ioctx.watch2(ref.obj.oid, &handle, this);
if (r < 0) {
lderr(store->ctx()) << "Failed to restart watch on " << ref.obj
<< " with " << cpp_strerror(-r) << dendl;
- ref.pool.ioctx().close();
+ ref.ioctx.close();
}
return r;
}
void stop() {
if (handle) {
- ref.pool.ioctx().unwatch2(handle);
- ref.pool.ioctx().close();
+ ref.ioctx.unwatch2(handle);
+ ref.ioctx.close();
}
}
@@ -332,7 +332,7 @@ class BucketTrimWatcher : public librados::WatchCtx2 {
} catch (const buffer::error& e) {
lderr(store->ctx()) << "Failed to decode notification: " << e.what() << dendl;
}
- ref.pool.ioctx().notify_ack(ref.obj.oid, notify_id, cookie, reply);
+ ref.ioctx.notify_ack(ref.obj.oid, notify_id, cookie, reply);
}
/// reestablish the watch if it gets disconnected
@@ -617,7 +617,7 @@ int BucketTrimInstanceCR::operate(const DoutPrefixProvider *dpp)
get_policy_params.zone = zone_id;
get_policy_params.bucket = bucket;
- yield call(new RGWBucketGetSyncPolicyHandlerCR(store->svc()->rados->get_async_processor(),
+ yield call(new RGWBucketGetSyncPolicyHandlerCR(store->svc()->async_processor,
store,
get_policy_params,
source_policy,
@@ -728,14 +728,14 @@ int BucketTrimInstanceCR::operate(const DoutPrefixProvider *dpp)
}
while (clean_info && retries < MAX_RETRIES) {
yield call(new RGWPutBucketInstanceInfoCR(
- store->svc()->rados->get_async_processor(),
+ store->svc()->async_processor,
store, clean_info->first, false, {},
no_change_attrs(), dpp));
// Raced, try again.
if (retcode == -ECANCELED) {
yield call(new RGWGetBucketInstanceInfoCR(
- store->svc()->rados->get_async_processor(),
+ store->svc()->async_processor,
store, clean_info->first.bucket,
&(clean_info->first), nullptr, dpp));
if (retcode < 0) {
@@ -1132,7 +1132,7 @@ int BucketTrimCR::operate(const DoutPrefixProvider *dpp)
return buckets.size() < config.buckets_per_interval;
};
- call(new MetadataListCR(cct, store->svc()->rados->get_async_processor(),
+ call(new MetadataListCR(cct, store->svc()->async_processor,
store->ctl()->meta.mgr,
section, status.marker, cb));
}
@@ -1219,7 +1219,7 @@ int BucketTrimPollCR::operate(const DoutPrefixProvider *dpp)
// prevent others from trimming for our entire wait interval
set_status("acquiring trim lock");
- yield call(new RGWSimpleRadosLockCR(store->svc()->rados->get_async_processor(), store,
+ yield call(new RGWSimpleRadosLockCR(store->svc()->async_processor, store,
obj, name, cookie,
config.trim_interval_sec));
if (retcode < 0) {
@@ -1232,7 +1232,7 @@ int BucketTrimPollCR::operate(const DoutPrefixProvider *dpp)
if (retcode < 0) {
// on errors, unlock so other gateways can try
set_status("unlocking");
- yield call(new RGWSimpleRadosUnlockCR(store->svc()->rados->get_async_processor(), store,
+ yield call(new RGWSimpleRadosUnlockCR(store->svc()->async_processor, store,
obj, name, cookie));
}
}
diff --git a/src/rgw/driver/rados/rgw_trim_datalog.cc b/src/rgw/driver/rados/rgw_trim_datalog.cc
index bac0cda8dd6..5dcddb659e1 100644
--- a/src/rgw/driver/rados/rgw_trim_datalog.cc
+++ b/src/rgw/driver/rados/rgw_trim_datalog.cc
@@ -224,7 +224,7 @@ int DataLogTrimPollCR::operate(const DoutPrefixProvider *dpp)
// prevent other gateways from attempting to trim for the duration
set_status("acquiring trim lock");
- yield call(new RGWSimpleRadosLockCR(store->svc()->rados->get_async_processor(), store,
+ yield call(new RGWSimpleRadosLockCR(store->svc()->async_processor, store,
rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, lock_oid),
"data_trim", lock_cookie,
// interval is a small number and unlikely to overflow
diff --git a/src/rgw/driver/rados/rgw_trim_mdlog.cc b/src/rgw/driver/rados/rgw_trim_mdlog.cc
index 13773ae2877..be513b6e74d 100644
--- a/src/rgw/driver/rados/rgw_trim_mdlog.cc
+++ b/src/rgw/driver/rados/rgw_trim_mdlog.cc
@@ -565,7 +565,7 @@ class MetaPeerTrimShardCollectCR : public RGWShardCollectCR {
env(env), mdlog(mdlog), period_id(env.current.get_period().get_id())
{
meta_env.init(env.dpp, cct, env.store, env.store->svc()->zone->get_master_conn(),
- env.store->svc()->rados->get_async_processor(), env.http, nullptr,
+ env.store->svc()->async_processor, env.http, nullptr,
env.store->getRados()->get_sync_tracer());
}
@@ -669,7 +669,7 @@ int MetaTrimPollCR::operate(const DoutPrefixProvider *dpp)
// prevent others from trimming for our entire wait interval
set_status("acquiring trim lock");
- yield call(new RGWSimpleRadosLockCR(store->svc()->rados->get_async_processor(), store,
+ yield call(new RGWSimpleRadosLockCR(store->svc()->async_processor, store,
obj, name, cookie,
// interval is a small number and unlikely to overflow
// coverity[store_truncates_time_t:SUPPRESS]
@@ -685,7 +685,7 @@ int MetaTrimPollCR::operate(const DoutPrefixProvider *dpp)
if (retcode < 0) {
// on errors, unlock so other gateways can try
set_status("unlocking");
- yield call(new RGWSimpleRadosUnlockCR(store->svc()->rados->get_async_processor(), store,
+ yield call(new RGWSimpleRadosUnlockCR(store->svc()->async_processor, store,
obj, name, cookie));
}
}
diff --git a/src/rgw/driver/rados/rgw_user.cc b/src/rgw/driver/rados/rgw_user.cc
index 506ac0acf11..b5569e481c5 100644
--- a/src/rgw/driver/rados/rgw_user.cc
+++ b/src/rgw/driver/rados/rgw_user.cc
@@ -237,7 +237,7 @@ int RGWAccessKeyPool::init(RGWUserAdminOpState& op_state)
}
const rgw_user& uid = op_state.get_user_id();
- if (uid.compare(RGW_USER_ANON_ID) == 0) {
+ if (uid == rgw_user(RGW_USER_ANON_ID)) {
keys_allowed = false;
return -EINVAL;
}
@@ -890,7 +890,7 @@ int RGWSubUserPool::init(RGWUserAdminOpState& op_state)
}
const rgw_user& uid = op_state.get_user_id();
- if (uid.compare(RGW_USER_ANON_ID) == 0) {
+ if (uid == rgw_user(RGW_USER_ANON_ID)) {
subusers_allowed = false;
return -EACCES;
}
@@ -1198,7 +1198,7 @@ int RGWUserCapPool::init(RGWUserAdminOpState& op_state)
}
const rgw_user& uid = op_state.get_user_id();
- if (uid.compare(RGW_USER_ANON_ID) == 0) {
+ if (uid == rgw_user(RGW_USER_ANON_ID)) {
caps_allowed = false;
return -EACCES;
}
@@ -1373,7 +1373,7 @@ int RGWUser::init(const DoutPrefixProvider *dpp, RGWUserAdminOpState& op_state,
}
}
- if (!user_id.empty() && (user_id.compare(RGW_USER_ANON_ID) != 0)) {
+ if (!user_id.empty() && user_id != rgw_user(RGW_USER_ANON_ID)) {
user = driver->get_user(user_id);
found = (user->load_user(dpp, y) >= 0);
op_state.found_by_uid = found;
@@ -1477,12 +1477,12 @@ int RGWUser::check_op(RGWUserAdminOpState& op_state, std::string *err_msg)
int ret = 0;
const rgw_user& uid = op_state.get_user_id();
- if (uid.compare(RGW_USER_ANON_ID) == 0) {
+ if (uid == rgw_user(RGW_USER_ANON_ID)) {
set_err_msg(err_msg, "unable to perform operations on the anonymous user");
return -EINVAL;
}
- if (is_populated() && user_id.compare(uid) != 0) {
+ if (is_populated() && user_id != uid) {
set_err_msg(err_msg, "user id mismatch, operation id: " + uid.to_str()
+ " does not match: " + user_id.to_str());
@@ -1858,7 +1858,7 @@ int RGWUser::execute_modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState&
}
// ensure that we can modify the user's attributes
- if (user_id.compare(RGW_USER_ANON_ID) == 0) {
+ if (user_id == rgw_user(RGW_USER_ANON_ID)) {
set_err_msg(err_msg, "unable to modify anonymous user's info");
return -EACCES;
}
@@ -1870,7 +1870,7 @@ int RGWUser::execute_modify(const DoutPrefixProvider *dpp, RGWUserAdminOpState&
// make sure we are not adding a duplicate email
if (old_email != op_email) {
ret = driver->get_user_by_email(dpp, op_email, y, &duplicate_check);
- if (ret >= 0 && duplicate_check->get_id().compare(user_id) != 0) {
+ if (ret >= 0 && duplicate_check->get_id() != user_id) {
set_err_msg(err_msg, "cannot add duplicate email");
return -ERR_EMAIL_EXIST;
}
diff --git a/src/rgw/driver/rados/rgw_user.h b/src/rgw/driver/rados/rgw_user.h
index d5eb167daa3..a0cd7ed84fe 100644
--- a/src/rgw/driver/rados/rgw_user.h
+++ b/src/rgw/driver/rados/rgw_user.h
@@ -30,8 +30,6 @@ class RGWUserCtl;
class RGWBucketCtl;
class RGWUserBuckets;
-class RGWGetUserStats_CB;
-
/**
* A string wrapper that includes encode/decode functions
* for easily accessing a UID in all forms
diff --git a/src/rgw/driver/rados/sync_fairness.cc b/src/rgw/driver/rados/sync_fairness.cc
index ded1cf56a71..1ac27f4bacc 100644
--- a/src/rgw/driver/rados/sync_fairness.cc
+++ b/src/rgw/driver/rados/sync_fairness.cc
@@ -144,18 +144,18 @@ class Watcher : public librados::WatchCtx2 {
}
// register a watch on the control object
- r = ref.pool.ioctx().watch2(ref.obj.oid, &handle, this);
+ r = ref.ioctx.watch2(ref.obj.oid, &handle, this);
if (r == -ENOENT) {
constexpr bool exclusive = true;
- r = ref.pool.ioctx().create(ref.obj.oid, exclusive);
+ r = ref.ioctx.create(ref.obj.oid, exclusive);
if (r == -EEXIST || r == 0) {
- r = ref.pool.ioctx().watch2(ref.obj.oid, &handle, this);
+ r = ref.ioctx.watch2(ref.obj.oid, &handle, this);
}
}
if (r < 0) {
ldpp_dout(dpp, -1) << "Failed to watch " << ref.obj
<< " with " << cpp_strerror(-r) << dendl;
- ref.pool.ioctx().close();
+ ref.ioctx.close();
return r;
}
@@ -165,16 +165,16 @@ class Watcher : public librados::WatchCtx2 {
int restart()
{
- int r = ref.pool.ioctx().unwatch2(handle);
+ int r = ref.ioctx.unwatch2(handle);
if (r < 0) {
ldpp_dout(dpp, -1) << "Failed to unwatch on " << ref.obj
<< " with " << cpp_strerror(-r) << dendl;
}
- r = ref.pool.ioctx().watch2(ref.obj.oid, &handle, this);
+ r = ref.ioctx.watch2(ref.obj.oid, &handle, this);
if (r < 0) {
ldpp_dout(dpp, -1) << "Failed to restart watch on " << ref.obj
<< " with " << cpp_strerror(-r) << dendl;
- ref.pool.ioctx().close();
+ ref.ioctx.close();
}
return r;
}
@@ -182,8 +182,8 @@ class Watcher : public librados::WatchCtx2 {
void stop()
{
if (handle) {
- ref.pool.ioctx().unwatch2(handle);
- ref.pool.ioctx().close();
+ ref.ioctx.unwatch2(handle);
+ ref.ioctx.close();
}
}
@@ -210,7 +210,7 @@ class Watcher : public librados::WatchCtx2 {
bufferlist reply;
encode(response, reply);
- ref.pool.ioctx().notify_ack(ref.obj.oid, notify_id, cookie, reply);
+ ref.ioctx.notify_ack(ref.obj.oid, notify_id, cookie, reply);
}
// reestablish the watch if it gets disconnected
@@ -289,7 +289,7 @@ class RadosBidManager : public BidManager, public Server, public DoutPrefix {
my_bids = this->my_bids;
}
- bool is_highest_bidder(std::size_t index)
+ bool is_highest_bidder(std::size_t index) override
{
auto lock = std::scoped_lock{mutex};
const bid_value my_bid = my_bids.at(index); // may throw
@@ -303,7 +303,7 @@ class RadosBidManager : public BidManager, public Server, public DoutPrefix {
return true;
}
- RGWCoroutine* notify_cr()
+ RGWCoroutine* notify_cr() override
{
auto lock = std::scoped_lock{mutex};
return new NotifyCR(store, this, obj, my_bids);
diff --git a/src/rgw/rgw_acl.cc b/src/rgw/rgw_acl.cc
index f32a73f2652..4369bedfc76 100644
--- a/src/rgw/rgw_acl.cc
+++ b/src/rgw/rgw_acl.cc
@@ -33,10 +33,7 @@ bool operator!=(const ACLGranteeType& lhs, const ACLGranteeType& rhs) {
}
bool operator==(const ACLGrant& lhs, const ACLGrant& rhs) {
- return lhs.type == rhs.type && lhs.id == rhs.id
- && lhs.email == rhs.email && lhs.permission == rhs.permission
- && lhs.name == rhs.name && lhs.group == rhs.group
- && lhs.url_spec == rhs.url_spec;
+ return lhs.grantee == rhs.grantee && lhs.permission == rhs.permission;
}
bool operator!=(const ACLGrant& lhs, const ACLGrant& rhs) {
return !(lhs == rhs);
@@ -61,13 +58,6 @@ bool operator!=(const RGWAccessControlList& lhs,
return !(lhs == rhs);
}
-bool operator==(const ACLOwner& lhs, const ACLOwner& rhs) {
- return lhs.id == rhs.id && lhs.display_name == rhs.display_name;
-}
-bool operator!=(const ACLOwner& lhs, const ACLOwner& rhs) {
- return !(lhs == rhs);
-}
-
bool operator==(const RGWAccessControlPolicy& lhs,
const RGWAccessControlPolicy& rhs) {
return lhs.acl == rhs.acl && lhs.owner == rhs.owner;
@@ -77,59 +67,49 @@ bool operator!=(const RGWAccessControlPolicy& lhs,
return !(lhs == rhs);
}
-void RGWAccessControlList::_add_grant(ACLGrant *grant)
+void RGWAccessControlList::register_grant(const ACLGrant& grant)
{
- ACLPermission& perm = grant->get_permission();
- ACLGranteeType& type = grant->get_type();
- switch (type.get_type()) {
- case ACL_TYPE_REFERER:
- referer_list.emplace_back(grant->get_referer(), perm.get_permissions());
+ ACLPermission perm = grant.get_permission();
+
+ if (const auto* user = grant.get_user(); user) {
+ acl_user_map[user->id.to_str()] |= perm.get_permissions();
+ } else if (const auto* email = grant.get_email(); email) {
+ acl_user_map[email->address] |= perm.get_permissions();
+ } else if (const auto* group = grant.get_group(); group) {
+ acl_group_map[group->type] |= perm.get_permissions();
+ } else if (const auto* referer = grant.get_referer(); referer) {
+ referer_list.emplace_back(referer->url_spec, perm.get_permissions());
/* We're specially handling the Swift's .r:* as the S3 API has a similar
* concept and thus we can have a small portion of compatibility here. */
- if (grant->get_referer() == RGW_REFERER_WILDCARD) {
+ if (referer->url_spec == RGW_REFERER_WILDCARD) {
acl_group_map[ACL_GROUP_ALL_USERS] |= perm.get_permissions();
}
- break;
- case ACL_TYPE_GROUP:
- acl_group_map[grant->get_group()] |= perm.get_permissions();
- break;
- default:
- {
- rgw_user id;
- if (!grant->get_id(id)) {
- ldout(cct, 0) << "ERROR: grant->get_id() failed" << dendl;
- }
- acl_user_map[id.to_str()] |= perm.get_permissions();
- }
}
}
-void RGWAccessControlList::add_grant(ACLGrant *grant)
+void RGWAccessControlList::add_grant(const ACLGrant& grant)
{
- rgw_user id;
- grant->get_id(id); // not that this will return false for groups, but that's ok, we won't search groups
- grant_map.insert(pair<string, ACLGrant>(id.to_str(), *grant));
- _add_grant(grant);
+ std::string id;
+ if (const auto* user = grant.get_user(); user) {
+ id = user->id.to_str();
+ } else if (const auto* email = grant.get_email(); email) {
+ id = email->address;
+ } // other types share the empty key in the grant multimap
+ grant_map.emplace(id, grant);
+ register_grant(grant);
}
-void RGWAccessControlList::remove_canon_user_grant(rgw_user& user_id)
+void RGWAccessControlList::remove_canon_user_grant(const rgw_user& user_id)
{
- auto multi_map_iter = grant_map.find(user_id.to_str());
- if(multi_map_iter != grant_map.end()) {
- auto grants = grant_map.equal_range(user_id.to_str());
- grant_map.erase(grants.first, grants.second);
- }
-
- auto map_iter = acl_user_map.find(user_id.to_str());
- if (map_iter != acl_user_map.end()){
- acl_user_map.erase(map_iter);
- }
+ const std::string& key = user_id.to_str();
+ grant_map.erase(key);
+ acl_user_map.erase(key);
}
uint32_t RGWAccessControlList::get_perm(const DoutPrefixProvider* dpp,
const rgw::auth::Identity& auth_identity,
- const uint32_t perm_mask)
+ const uint32_t perm_mask) const
{
ldpp_dout(dpp, 5) << "Searching permissions for identity=" << auth_identity
<< " mask=" << perm_mask << dendl;
@@ -156,7 +136,7 @@ uint32_t RGWAccessControlList::get_group_perm(const DoutPrefixProvider *dpp,
uint32_t RGWAccessControlList::get_referer_perm(const DoutPrefixProvider *dpp,
const uint32_t current_perm,
const std::string http_referer,
- const uint32_t perm_mask)
+ const uint32_t perm_mask) const
{
ldpp_dout(dpp, 5) << "Searching permissions for referer=" << http_referer
<< " mask=" << perm_mask << dendl;
@@ -180,14 +160,14 @@ uint32_t RGWAccessControlPolicy::get_perm(const DoutPrefixProvider* dpp,
const rgw::auth::Identity& auth_identity,
const uint32_t perm_mask,
const char * const http_referer,
- bool ignore_public_acls)
+ bool ignore_public_acls) const
{
ldpp_dout(dpp, 20) << "-- Getting permissions begin with perm_mask=" << perm_mask
<< dendl;
uint32_t perm = acl.get_perm(dpp, auth_identity, perm_mask);
- if (auth_identity.is_owner_of(owner.get_id())) {
+ if (auth_identity.is_owner_of(owner.id)) {
perm |= perm_mask & (RGW_PERM_READ_ACP | RGW_PERM_WRITE_ACP);
}
@@ -211,7 +191,7 @@ uint32_t RGWAccessControlPolicy::get_perm(const DoutPrefixProvider* dpp,
}
ldpp_dout(dpp, 5) << "-- Getting permissions done for identity=" << auth_identity
- << ", owner=" << owner.get_id()
+ << ", owner=" << owner.id
<< ", perm=" << perm << dendl;
return perm;
@@ -222,7 +202,7 @@ bool RGWAccessControlPolicy::verify_permission(const DoutPrefixProvider* dpp,
const uint32_t user_perm_mask,
const uint32_t perm,
const char * const http_referer,
- bool ignore_public_acls)
+ bool ignore_public_acls) const
{
uint32_t test_perm = perm | RGW_PERM_READ_OBJS | RGW_PERM_WRITE_OBJS;
@@ -286,31 +266,36 @@ void ACLGranteeType::dump(Formatter *f) const
void ACLGrant::dump(Formatter *f) const
{
f->open_object_section("type");
- type.dump(f);
+ get_type().dump(f);
f->close_section();
- f->dump_string("id", id.to_str());
- f->dump_string("email", email);
+ struct dump_visitor {
+ Formatter* f;
- f->open_object_section("permission");
- permission.dump(f);
- f->close_section();
+ void operator()(const ACLGranteeCanonicalUser& user) {
+ encode_json("id", user.id, f);
+ encode_json("name", user.name, f);
+ }
+ void operator()(const ACLGranteeEmailUser& email) {
+ encode_json("email", email.address, f);
+ }
+ void operator()(const ACLGranteeGroup& group) {
+ encode_json("group", static_cast<int>(group.type), f);
+ }
+ void operator()(const ACLGranteeUnknown&) {}
+ void operator()(const ACLGranteeReferer& r) {
+ encode_json("url_spec", r.url_spec, f);
+ }
+ };
+ std::visit(dump_visitor{f}, grantee);
- f->dump_string("name", name);
- f->dump_int("group", (int)group);
- f->dump_string("url_spec", url_spec);
+ encode_json("permission", permission, f);
}
void ACLGrant::generate_test_instances(list<ACLGrant*>& o)
{
- rgw_user id("rgw");
- string name, email;
- name = "Mr. RGW";
- email = "r@gw";
-
ACLGrant *g1 = new ACLGrant;
- g1->set_canon(id, name, RGW_PERM_READ);
- g1->email = email;
+ g1->set_canon(rgw_user{"rgw"}, "Mr. RGW", RGW_PERM_READ);
o.push_back(g1);
ACLGrant *g2 = new ACLGrant;
@@ -322,28 +307,22 @@ void ACLGrant::generate_test_instances(list<ACLGrant*>& o)
void ACLGranteeType::generate_test_instances(list<ACLGranteeType*>& o)
{
- ACLGranteeType *t = new ACLGranteeType;
- t->set(ACL_TYPE_CANON_USER);
- o.push_back(t);
+ o.push_back(new ACLGranteeType(ACL_TYPE_CANON_USER));
o.push_back(new ACLGranteeType);
}
void RGWAccessControlList::generate_test_instances(list<RGWAccessControlList*>& o)
{
- RGWAccessControlList *acl = new RGWAccessControlList(NULL);
-
- list<ACLGrant *> glist;
- list<ACLGrant *>::iterator iter;
-
- ACLGrant::generate_test_instances(glist);
- for (iter = glist.begin(); iter != glist.end(); ++iter) {
- ACLGrant *grant = *iter;
- acl->add_grant(grant);
+ RGWAccessControlList *acl = new RGWAccessControlList;
+ list<ACLGrant *> grants;
+ ACLGrant::generate_test_instances(grants);
+ for (ACLGrant* grant : grants) {
+ acl->add_grant(*grant);
delete grant;
}
o.push_back(acl);
- o.push_back(new RGWAccessControlList(NULL));
+ o.push_back(new RGWAccessControlList);
}
void ACLOwner::generate_test_instances(list<ACLOwner*>& o)
@@ -363,21 +342,19 @@ void RGWAccessControlPolicy::generate_test_instances(list<RGWAccessControlPolicy
RGWAccessControlList::generate_test_instances(acl_list);
iter = acl_list.begin();
- RGWAccessControlPolicy *p = new RGWAccessControlPolicy(NULL);
+ RGWAccessControlPolicy *p = new RGWAccessControlPolicy;
RGWAccessControlList *l = *iter;
p->acl = *l;
- string name = "radosgw";
- rgw_user id("rgw");
- p->owner.set_name(name);
- p->owner.set_id(id);
+ p->owner.id.id = "rgw";
+ p->owner.display_name = "radosgw";
o.push_back(p);
delete l;
}
- o.push_back(new RGWAccessControlPolicy(NULL));
+ o.push_back(new RGWAccessControlPolicy);
}
void RGWAccessControlList::dump(Formatter *f) const
@@ -434,9 +411,9 @@ void RGWAccessControlPolicy::dump(Formatter *f) const
encode_json("owner", owner, f);
}
-ACLGroupTypeEnum ACLGrant::uri_to_group(string& uri)
+ACLGroupTypeEnum ACLGrant::uri_to_group(std::string_view uri)
{
// this is required for backward compatibility
- return ACLGrant_S3::uri_to_group(uri);
+ return rgw::s3::acl_uri_to_group(uri);
}
diff --git a/src/rgw/rgw_acl.h b/src/rgw/rgw_acl.h
index c520501583b..e2887a7049b 100644
--- a/src/rgw/rgw_acl.h
+++ b/src/rgw/rgw_acl.h
@@ -6,6 +6,7 @@
#include <map>
#include <string>
#include <string_view>
+#include <variant>
#include <include/types.h>
#include <boost/optional.hpp>
@@ -15,118 +16,180 @@
#include "rgw_basic_types.h" //includes rgw_acl_types.h
+// acl grantee types
+struct ACLGranteeCanonicalUser {
+ rgw_user id;
+ std::string name;
+
+ friend auto operator<=>(const ACLGranteeCanonicalUser&,
+ const ACLGranteeCanonicalUser&) = default;
+};
+struct ACLGranteeEmailUser {
+ std::string address;
+
+ friend auto operator<=>(const ACLGranteeEmailUser&,
+ const ACLGranteeEmailUser&) = default;
+};
+struct ACLGranteeGroup {
+ ACLGroupTypeEnum type = ACL_GROUP_NONE;
+
+ friend auto operator<=>(const ACLGranteeGroup&,
+ const ACLGranteeGroup&) = default;
+};
+struct ACLGranteeUnknown {
+ friend auto operator<=>(const ACLGranteeUnknown&,
+ const ACLGranteeUnknown&) = default;
+};
+struct ACLGranteeReferer {
+ std::string url_spec;
+
+ friend auto operator<=>(const ACLGranteeReferer&,
+ const ACLGranteeReferer&) = default;
+};
+
class ACLGrant
{
protected:
- ACLGranteeType type;
- rgw_user id;
- std::string email;
- mutable rgw_user email_id;
+ // acl grantee variant, where variant index matches ACLGranteeTypeEnum
+ using ACLGrantee = std::variant<
+ ACLGranteeCanonicalUser,
+ ACLGranteeEmailUser,
+ ACLGranteeGroup,
+ ACLGranteeUnknown,
+ ACLGranteeReferer>;
+
+ ACLGrantee grantee;
ACLPermission permission;
- std::string name;
- ACLGroupTypeEnum group;
- std::string url_spec;
public:
- ACLGrant() : group(ACL_GROUP_NONE) {}
- virtual ~ACLGrant() {}
-
- /* there's an assumption here that email/uri/id encodings are
- different and there can't be any overlap */
- bool get_id(rgw_user& _id) const {
- switch(type.get_type()) {
- case ACL_TYPE_EMAIL_USER:
- _id = email; // implies from_str() that parses the 't:u' syntax
- return true;
- case ACL_TYPE_GROUP:
- case ACL_TYPE_REFERER:
- return false;
- default:
- _id = id;
- return true;
- }
+ ACLGranteeType get_type() const {
+ return static_cast<ACLGranteeTypeEnum>(grantee.index());
}
+ ACLPermission get_permission() const { return permission; }
- const rgw_user* get_id() const {
- switch(type.get_type()) {
- case ACL_TYPE_EMAIL_USER:
- email_id.from_str(email);
- return &email_id;
- case ACL_TYPE_GROUP:
- case ACL_TYPE_REFERER:
- return nullptr;
- default:
- return &id;
- }
+ // return the user grantee, or nullptr
+ const ACLGranteeCanonicalUser* get_user() const {
+ return std::get_if<ACLGranteeCanonicalUser>(&grantee);
+ }
+ // return the email grantee, or nullptr
+ const ACLGranteeEmailUser* get_email() const {
+ return std::get_if<ACLGranteeEmailUser>(&grantee);
+ }
+ // return the group grantee, or nullptr
+ const ACLGranteeGroup* get_group() const {
+ return std::get_if<ACLGranteeGroup>(&grantee);
+ }
+ // return the referer grantee, or nullptr
+ const ACLGranteeReferer* get_referer() const {
+ return std::get_if<ACLGranteeReferer>(&grantee);
}
-
- ACLGranteeType& get_type() { return type; }
- const ACLGranteeType& get_type() const { return type; }
- ACLPermission& get_permission() { return permission; }
- const ACLPermission& get_permission() const { return permission; }
- ACLGroupTypeEnum get_group() const { return group; }
- const std::string& get_referer() const { return url_spec; }
void encode(bufferlist& bl) const {
ENCODE_START(5, 3, bl);
+ ACLGranteeType type = get_type();
encode(type, bl);
- std::string s;
- id.to_str(s);
- encode(s, bl);
- std::string uri;
+
+ if (const ACLGranteeCanonicalUser* user = get_user(); user) {
+ encode(user->id.to_str(), bl);
+ } else {
+ encode(std::string{}, bl); // encode empty id
+ }
+
+ std::string uri; // always empty, v2 converted to 'ACLGroupTypeEnum g' below
encode(uri, bl);
- encode(email, bl);
+
+ if (const ACLGranteeEmailUser* email = get_email(); email) {
+ encode(email->address, bl);
+ } else {
+ encode(std::string{}, bl); // encode empty email address
+ }
encode(permission, bl);
- encode(name, bl);
- __u32 g = (__u32)group;
+ if (const ACLGranteeCanonicalUser* user = get_user(); user) {
+ encode(user->name, bl);
+ } else {
+ encode(std::string{}, bl); // encode empty name
+ }
+
+ __u32 g;
+ if (const ACLGranteeGroup* group = get_group(); group) {
+ g = static_cast<__u32>(group->type);
+ } else {
+ g = static_cast<__u32>(ACL_GROUP_NONE);
+ }
encode(g, bl);
- encode(url_spec, bl);
+
+ if (const ACLGranteeReferer* referer = get_referer(); referer) {
+ encode(referer->url_spec, bl);
+ } else {
+ encode(std::string{}, bl); // encode empty referer
+ }
ENCODE_FINISH(bl);
}
void decode(bufferlist::const_iterator& bl) {
DECODE_START_LEGACY_COMPAT_LEN(5, 3, 3, bl);
+ ACLGranteeType type;
decode(type, bl);
+
+ ACLGranteeCanonicalUser user;
std::string s;
decode(s, bl);
- id.from_str(s);
+ user.id.from_str(s);
+
std::string uri;
decode(uri, bl);
- decode(email, bl);
+
+ ACLGranteeEmailUser email;
+ decode(email.address, bl);
+
decode(permission, bl);
- decode(name, bl);
- if (struct_v > 1) {
- __u32 g;
- decode(g, bl);
- group = (ACLGroupTypeEnum)g;
- } else {
- group = uri_to_group(uri);
- }
+ decode(user.name, bl);
+
+ ACLGranteeGroup group;
+ __u32 g;
+ decode(g, bl);
+ group.type = static_cast<ACLGroupTypeEnum>(g);
+
+ ACLGranteeReferer referer;
if (struct_v >= 5) {
- decode(url_spec, bl);
- } else {
- url_spec.clear();
+ decode(referer.url_spec, bl);
+ }
+
+ // construct the grantee type
+ switch (type) {
+ case ACL_TYPE_CANON_USER:
+ grantee = std::move(user);
+ break;
+ case ACL_TYPE_EMAIL_USER:
+ grantee = std::move(email);
+ break;
+ case ACL_TYPE_GROUP:
+ grantee = std::move(group);
+ break;
+ case ACL_TYPE_REFERER:
+ grantee = std::move(referer);
+ break;
+ case ACL_TYPE_UNKNOWN:
+ default:
+ grantee = ACLGranteeUnknown{};
+ break;
}
DECODE_FINISH(bl);
}
void dump(Formatter *f) const;
static void generate_test_instances(std::list<ACLGrant*>& o);
- ACLGroupTypeEnum uri_to_group(std::string& uri);
+ static ACLGroupTypeEnum uri_to_group(std::string_view uri);
- void set_canon(const rgw_user& _id, const std::string& _name, const uint32_t perm) {
- type.set(ACL_TYPE_CANON_USER);
- id = _id;
- name = _name;
+ void set_canon(const rgw_user& id, const std::string& name, uint32_t perm) {
+ grantee = ACLGranteeCanonicalUser{id, name};
permission.set_permissions(perm);
}
- void set_group(ACLGroupTypeEnum _group, const uint32_t perm) {
- type.set(ACL_TYPE_GROUP);
- group = _group;
+ void set_group(ACLGroupTypeEnum group, uint32_t perm) {
+ grantee = ACLGranteeGroup{group};
permission.set_permissions(perm);
}
- void set_referer(const std::string& _url_spec, const uint32_t perm) {
- type.set(ACL_TYPE_REFERER);
- url_spec = _url_spec;
+ void set_referer(const std::string& url_spec, uint32_t perm) {
+ grantee = ACLGranteeReferer{url_spec};
permission.set_permissions(perm);
}
@@ -219,31 +282,22 @@ using ACLGrantMap = std::multimap<std::string, ACLGrant>;
class RGWAccessControlList
{
protected:
- CephContext *cct;
/* FIXME: in the feature we should consider switching to uint32_t also
* in data structures. */
std::map<std::string, int> acl_user_map;
std::map<uint32_t, int> acl_group_map;
std::list<ACLReferer> referer_list;
ACLGrantMap grant_map;
- void _add_grant(ACLGrant *grant);
+ // register a grant in the correspoding acl_user/group_map
+ void register_grant(const ACLGrant& grant);
public:
- explicit RGWAccessControlList(CephContext *_cct) : cct(_cct) {}
- RGWAccessControlList() : cct(NULL) {}
-
- void set_ctx(CephContext *ctx) {
- cct = ctx;
- }
-
- virtual ~RGWAccessControlList() {}
-
uint32_t get_perm(const DoutPrefixProvider* dpp,
const rgw::auth::Identity& auth_identity,
- uint32_t perm_mask);
+ uint32_t perm_mask) const;
uint32_t get_group_perm(const DoutPrefixProvider *dpp, ACLGroupTypeEnum group, uint32_t perm_mask) const;
uint32_t get_referer_perm(const DoutPrefixProvider *dpp, uint32_t current_perm,
std::string http_referer,
- uint32_t perm_mask);
+ uint32_t perm_mask) const;
void encode(bufferlist& bl) const {
ENCODE_START(4, 3, bl);
bool maps_initialized = true;
@@ -263,10 +317,9 @@ public:
if (struct_v >= 2) {
decode(acl_group_map, bl);
} else if (!maps_initialized) {
- ACLGrantMap::iterator iter;
- for (iter = grant_map.begin(); iter != grant_map.end(); ++iter) {
- ACLGrant& grant = iter->second;
- _add_grant(&grant);
+ // register everything in the grant_map
+ for (const auto& [id, grant] : grant_map) {
+ register_grant(grant);
}
}
if (struct_v >= 4) {
@@ -277,20 +330,20 @@ public:
void dump(Formatter *f) const;
static void generate_test_instances(std::list<RGWAccessControlList*>& o);
- void add_grant(ACLGrant *grant);
- void remove_canon_user_grant(rgw_user& user_id);
+ void add_grant(const ACLGrant& grant);
+ void remove_canon_user_grant(const rgw_user& user_id);
ACLGrantMap& get_grant_map() { return grant_map; }
const ACLGrantMap& get_grant_map() const { return grant_map; }
- void create_default(const rgw_user& id, std::string name) {
+ void create_default(const rgw_user& id, const std::string& name) {
acl_user_map.clear();
acl_group_map.clear();
referer_list.clear();
ACLGrant grant;
grant.set_canon(id, name, RGW_PERM_FULL_CONTROL);
- add_grant(&grant);
+ add_grant(grant);
}
friend bool operator==(const RGWAccessControlList& lhs, const RGWAccessControlList& rhs);
@@ -298,15 +351,9 @@ public:
};
WRITE_CLASS_ENCODER(RGWAccessControlList)
-class ACLOwner
-{
-protected:
+struct ACLOwner {
rgw_user id;
std::string display_name;
-public:
- ACLOwner() {}
- ACLOwner(const rgw_user& _id) : id(_id) {}
- ~ACLOwner() {}
void encode(bufferlist& bl) const {
ENCODE_START(3, 2, bl);
@@ -327,46 +374,29 @@ public:
void dump(Formatter *f) const;
void decode_json(JSONObj *obj);
static void generate_test_instances(std::list<ACLOwner*>& o);
- void set_id(const rgw_user& _id) { id = _id; }
- void set_name(const std::string& name) { display_name = name; }
-
- rgw_user& get_id() { return id; }
- const rgw_user& get_id() const { return id; }
- std::string& get_display_name() { return display_name; }
- const std::string& get_display_name() const { return display_name; }
- friend bool operator==(const ACLOwner& lhs, const ACLOwner& rhs);
- friend bool operator!=(const ACLOwner& lhs, const ACLOwner& rhs);
+
+ auto operator<=>(const ACLOwner&) const = default;
};
WRITE_CLASS_ENCODER(ACLOwner)
class RGWAccessControlPolicy
{
protected:
- CephContext *cct;
RGWAccessControlList acl;
ACLOwner owner;
public:
- explicit RGWAccessControlPolicy(CephContext *_cct) : cct(_cct), acl(_cct) {}
- RGWAccessControlPolicy() : cct(NULL), acl(NULL) {}
- virtual ~RGWAccessControlPolicy() {}
-
- void set_ctx(CephContext *ctx) {
- cct = ctx;
- acl.set_ctx(ctx);
- }
-
uint32_t get_perm(const DoutPrefixProvider* dpp,
const rgw::auth::Identity& auth_identity,
uint32_t perm_mask,
const char * http_referer,
- bool ignore_public_acls=false);
+ bool ignore_public_acls=false) const;
bool verify_permission(const DoutPrefixProvider* dpp,
const rgw::auth::Identity& auth_identity,
uint32_t user_perm_mask,
uint32_t perm,
const char * http_referer = nullptr,
- bool ignore_public_acls=false);
+ bool ignore_public_acls=false) const;
void encode(bufferlist& bl) const {
ENCODE_START(2, 2, bl);
@@ -388,15 +418,14 @@ public:
DECODE_FINISH(bl);
}
- void set_owner(ACLOwner& o) { owner = o; }
- ACLOwner& get_owner() {
- return owner;
- }
+ void set_owner(const ACLOwner& o) { owner = o; }
+ const ACLOwner& get_owner() const { return owner; }
+ ACLOwner& get_owner() { return owner; }
- void create_default(const rgw_user& id, std::string& name) {
+ void create_default(const rgw_user& id, const std::string& name) {
acl.create_default(id, name);
- owner.set_id(id);
- owner.set_name(name);
+ owner.id = id;
+ owner.display_name = name;
}
RGWAccessControlList& get_acl() {
return acl;
@@ -405,7 +434,6 @@ public:
return acl;
}
- virtual bool compare_group_name(std::string& id, ACLGroupTypeEnum group) { return false; }
bool is_public(const DoutPrefixProvider *dpp) const;
friend bool operator==(const RGWAccessControlPolicy& lhs, const RGWAccessControlPolicy& rhs);
diff --git a/src/rgw/rgw_acl_s3.cc b/src/rgw/rgw_acl_s3.cc
index 9f71e328150..e45fb552aff 100644
--- a/src/rgw/rgw_acl_s3.cc
+++ b/src/rgw/rgw_acl_s3.cc
@@ -7,6 +7,7 @@
#include <map>
#include "include/types.h"
+#include "common/split.h"
#include "rgw_acl_s3.h"
#include "rgw_user.h"
@@ -24,8 +25,17 @@ using namespace std;
static string rgw_uri_all_users = RGW_URI_ALL_USERS;
static string rgw_uri_auth_users = RGW_URI_AUTH_USERS;
-void ACLPermission_S3::to_xml(ostream& out)
+class ACLPermission_S3 : public XMLObj
{
+public:
+ uint32_t flags = 0;
+
+ bool xml_end(const char *el) override;
+};
+
+void to_xml(ACLPermission perm, std::ostream& out)
+{
+ const uint32_t flags = perm.get_permissions();
if ((flags & RGW_PERM_FULL_CONTROL) == RGW_PERM_FULL_CONTROL) {
out << "<Permission>FULL_CONTROL</Permission>";
} else {
@@ -40,8 +50,7 @@ void ACLPermission_S3::to_xml(ostream& out)
}
}
-bool ACLPermission_S3::
-xml_end(const char *el)
+bool ACLPermission_S3::xml_end(const char *el)
{
const char *s = data.c_str();
if (strcasecmp(s, "READ") == 0) {
@@ -66,7 +75,7 @@ xml_end(const char *el)
class ACLGranteeType_S3 {
public:
- static const char *to_string(ACLGranteeType& type) {
+ static const char *to_string(ACLGranteeType type) {
switch (type.get_type()) {
case ACL_TYPE_CANON_USER:
return "CanonicalUser";
@@ -95,6 +104,15 @@ public:
}
};
+class ACLGrantee_S3 : public XMLObj
+{
+public:
+ ACLGrantee_S3() {}
+ virtual ~ACLGrantee_S3() override {}
+
+ bool xml_start(const char *el, const char **attr);
+};
+
class ACLID_S3 : public XMLObj
{
public:
@@ -124,6 +142,15 @@ public:
~ACLDisplayName_S3() override {}
};
+class ACLOwner_S3 : public XMLObj
+{
+public:
+ std::string id;
+ std::string display_name;
+
+ bool xml_end(const char *el) override;
+};
+
bool ACLOwner_S3::xml_end(const char *el) {
ACLID_S3 *acl_id = static_cast<ACLID_S3 *>(find_first("ID"));
ACLID_S3 *acl_name = static_cast<ACLID_S3 *>(find_first("DisplayName"));
@@ -142,23 +169,37 @@ bool ACLOwner_S3::xml_end(const char *el) {
return true;
}
-void ACLOwner_S3::to_xml(ostream& out) {
+void to_xml(const ACLOwner& o, std::ostream& out)
+{
string s;
- id.to_str(s);
+ o.id.to_str(s);
if (s.empty())
return;
out << "<Owner>" << "<ID>" << s << "</ID>";
- if (!display_name.empty())
- out << "<DisplayName>" << display_name << "</DisplayName>";
+ if (!o.display_name.empty())
+ out << "<DisplayName>" << o.display_name << "</DisplayName>";
out << "</Owner>";
}
+class ACLGrant_S3 : public XMLObj
+{
+public:
+ ACLGranteeType type;
+ std::string id;
+ std::string name;
+ std::string uri;
+ std::string email;
+ ACLPermission_S3* permission = nullptr;
+
+ bool xml_end(const char *el) override;
+ bool xml_start(const char *el, const char **attr);
+};
+
bool ACLGrant_S3::xml_end(const char *el) {
ACLGrantee_S3 *acl_grantee;
ACLID_S3 *acl_id;
ACLURI_S3 *acl_uri;
ACLEmail_S3 *acl_email;
- ACLPermission_S3 *acl_permission;
ACLDisplayName_S3 *acl_name;
string uri;
@@ -168,17 +209,12 @@ bool ACLGrant_S3::xml_end(const char *el) {
string type_str;
if (!acl_grantee->get_attr("xsi:type", type_str))
return false;
- ACLGranteeType_S3::set(type_str.c_str(), type);
-
- acl_permission = static_cast<ACLPermission_S3 *>(find_first("Permission"));
- if (!acl_permission)
- return false;
- permission = *acl_permission;
+ ACLGranteeType_S3::set(type_str.c_str(), type);
- id.clear();
- name.clear();
- email.clear();
+ permission = static_cast<ACLPermission_S3*>(find_first("Permission"));
+ if (!permission)
+ return false;
switch (type.get_type()) {
case ACL_TYPE_CANON_USER:
@@ -195,7 +231,6 @@ bool ACLGrant_S3::xml_end(const char *el) {
if (!acl_uri)
return false;
uri = acl_uri->get_data();
- group = uri_to_group(uri);
break;
case ACL_TYPE_EMAIL_USER:
acl_email = static_cast<ACLEmail_S3 *>(acl_grantee->find_first("EmailAddress"));
@@ -210,72 +245,51 @@ bool ACLGrant_S3::xml_end(const char *el) {
return true;
}
-void ACLGrant_S3::to_xml(CephContext *cct, ostream& out) {
- ACLPermission_S3& perm = static_cast<ACLPermission_S3 &>(permission);
+void to_xml(const ACLGrant& grant, ostream& out)
+{
+ const ACLPermission perm = grant.get_permission();
/* only show s3 compatible permissions */
if (!(perm.get_permissions() & RGW_PERM_ALL_S3))
return;
- string uri;
+ const std::string type = ACLGranteeType_S3::to_string(grant.get_type());
out << "<Grant>" <<
- "<Grantee xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:type=\"" << ACLGranteeType_S3::to_string(type) << "\">";
- switch (type.get_type()) {
- case ACL_TYPE_CANON_USER:
- out << "<ID>" << id << "</ID>";
- if (name.size()) {
- out << "<DisplayName>" << name << "</DisplayName>";
- }
- break;
- case ACL_TYPE_EMAIL_USER:
- out << "<EmailAddress>" << email << "</EmailAddress>";
- break;
- case ACL_TYPE_GROUP:
- if (!group_to_uri(group, uri)) {
- ldout(cct, 0) << "ERROR: group_to_uri failed with group=" << (int)group << dendl;
- break;
+ "<Grantee xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:type=\"" << type << "\">";
+
+ if (const auto* user = grant.get_user(); user) {
+ out << "<ID>" << user->id << "</ID>";
+ if (user->name.size()) {
+ out << "<DisplayName>" << user->name << "</DisplayName>";
}
+ } else if (const auto* email = grant.get_email(); email) {
+ out << "<EmailAddress>" << email->address << "</EmailAddress>";
+ } else if (const auto* group = grant.get_group(); group) {
+ std::string uri;
+ rgw::s3::acl_group_to_uri(group->type, uri);
out << "<URI>" << uri << "</URI>";
- break;
- default:
- break;
}
out << "</Grantee>";
- perm.to_xml(out);
+ to_xml(perm, out);
out << "</Grant>";
}
-bool ACLGrant_S3::group_to_uri(ACLGroupTypeEnum group, string& uri)
+class RGWAccessControlList_S3 : public XMLObj
{
- switch (group) {
- case ACL_GROUP_ALL_USERS:
- uri = rgw_uri_all_users;
- return true;
- case ACL_GROUP_AUTHENTICATED_USERS:
- uri = rgw_uri_auth_users;
- return true;
- default:
- return false;
- }
-}
+public:
+ bool xml_end(const char *el) override;
+};
bool RGWAccessControlList_S3::xml_end(const char *el) {
- XMLObjIter iter = find("Grant");
- ACLGrant_S3 *grant = static_cast<ACLGrant_S3 *>(iter.get_next());
- while (grant) {
- add_grant(grant);
- grant = static_cast<ACLGrant_S3 *>(iter.get_next());
- }
return true;
}
-void RGWAccessControlList_S3::to_xml(ostream& out) {
- multimap<string, ACLGrant>::iterator iter;
+void to_xml(const RGWAccessControlList& acl, std::ostream& out)
+{
out << "<AccessControlList>";
- for (iter = grant_map.begin(); iter != grant_map.end(); ++iter) {
- ACLGrant_S3& grant = static_cast<ACLGrant_S3 &>(iter->second);
- grant.to_xml(cct, out);
+ for (const auto& p : acl.get_grant_map()) {
+ to_xml(p.second, out);
}
out << "</AccessControlList>";
}
@@ -285,16 +299,11 @@ struct s3_acl_header {
const char *http_header;
};
-static const char *get_acl_header(const RGWEnv *env,
- const struct s3_acl_header *perm)
-{
- const char *header = perm->http_header;
-
- return env->get(header, NULL);
-}
-
-static int parse_grantee_str(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, string& grantee_str,
- const struct s3_acl_header *perm, ACLGrant& grant)
+static int parse_grantee_str(const DoutPrefixProvider* dpp,
+ rgw::sal::Driver* driver,
+ const std::string& grantee_str,
+ const s3_acl_header* perm,
+ ACLGrant& grant)
{
string id_type, id_val_quoted;
int rgw_perm = perm->rgw_perm;
@@ -321,7 +330,7 @@ static int parse_grantee_str(const DoutPrefixProvider *dpp, rgw::sal::Driver* dr
grant.set_canon(user->get_id(), user->get_display_name(), rgw_perm);
} else if (strcasecmp(id_type.c_str(), "uri") == 0) {
- ACLGroupTypeEnum gid = grant.uri_to_group(id_val);
+ ACLGroupTypeEnum gid = rgw::s3::acl_uri_to_group(id_val);
if (gid == ACL_GROUP_NONE)
return -EINVAL;
@@ -333,71 +342,70 @@ static int parse_grantee_str(const DoutPrefixProvider *dpp, rgw::sal::Driver* dr
return 0;
}
-static int parse_acl_header(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver,
- const RGWEnv *env, const struct s3_acl_header *perm,
- std::list<ACLGrant>& _grants)
+static int parse_acl_header(const DoutPrefixProvider* dpp, rgw::sal::Driver* driver,
+ const RGWEnv& env, const s3_acl_header* perm,
+ RGWAccessControlList& acl)
{
- std::list<string> grantees;
- std::string hacl_str;
-
- const char *hacl = get_acl_header(env, perm);
- if (hacl == NULL)
+ const char* hacl = env.get(perm->http_header, nullptr);
+ if (hacl == nullptr) {
return 0;
+ }
- hacl_str = hacl;
- get_str_list(hacl_str, ",", grantees);
-
- for (list<string>::iterator it = grantees.begin(); it != grantees.end(); ++it) {
+ for (std::string_view grantee : ceph::split(hacl, ",")) {
ACLGrant grant;
- int ret = parse_grantee_str(dpp, driver, *it, perm, grant);
+ int ret = parse_grantee_str(dpp, driver, std::string{grantee}, perm, grant);
if (ret < 0)
return ret;
- _grants.push_back(grant);
+ acl.add_grant(grant);
}
return 0;
}
-int RGWAccessControlList_S3::create_canned(ACLOwner& owner, ACLOwner& bucket_owner, const string& canned_acl)
+static int create_canned(const ACLOwner& owner, const ACLOwner& bucket_owner,
+ const string& canned_acl, RGWAccessControlList& acl)
{
- acl_user_map.clear();
- grant_map.clear();
-
- ACLGrant owner_grant;
-
- rgw_user bid = bucket_owner.get_id();
- string bname = bucket_owner.get_display_name();
+ const rgw_user& bid = bucket_owner.id;
+ const std::string& bname = bucket_owner.display_name;
/* owner gets full control */
- owner_grant.set_canon(owner.get_id(), owner.get_display_name(), RGW_PERM_FULL_CONTROL);
- add_grant(&owner_grant);
+ {
+ ACLGrant grant;
+ grant.set_canon(owner.id, owner.display_name, RGW_PERM_FULL_CONTROL);
+ acl.add_grant(grant);
+ }
if (canned_acl.size() == 0 || canned_acl.compare("private") == 0) {
return 0;
}
- ACLGrant bucket_owner_grant;
- ACLGrant group_grant;
- if (canned_acl.compare("public-read") == 0) {
- group_grant.set_group(ACL_GROUP_ALL_USERS, RGW_PERM_READ);
- add_grant(&group_grant);
- } else if (canned_acl.compare("public-read-write") == 0) {
- group_grant.set_group(ACL_GROUP_ALL_USERS, RGW_PERM_READ);
- add_grant(&group_grant);
- group_grant.set_group(ACL_GROUP_ALL_USERS, RGW_PERM_WRITE);
- add_grant(&group_grant);
- } else if (canned_acl.compare("authenticated-read") == 0) {
- group_grant.set_group(ACL_GROUP_AUTHENTICATED_USERS, RGW_PERM_READ);
- add_grant(&group_grant);
- } else if (canned_acl.compare("bucket-owner-read") == 0) {
- bucket_owner_grant.set_canon(bid, bname, RGW_PERM_READ);
- if (bid.compare(owner.get_id()) != 0)
- add_grant(&bucket_owner_grant);
- } else if (canned_acl.compare("bucket-owner-full-control") == 0) {
- bucket_owner_grant.set_canon(bid, bname, RGW_PERM_FULL_CONTROL);
- if (bid.compare(owner.get_id()) != 0)
- add_grant(&bucket_owner_grant);
+ if (canned_acl == "public-read") {
+ ACLGrant grant;
+ grant.set_group(ACL_GROUP_ALL_USERS, RGW_PERM_READ);
+ acl.add_grant(grant);
+ } else if (canned_acl == "public-read-write") {
+ ACLGrant grant;
+ grant.set_group(ACL_GROUP_ALL_USERS, RGW_PERM_READ);
+ acl.add_grant(grant);
+ grant.set_group(ACL_GROUP_ALL_USERS, RGW_PERM_WRITE);
+ acl.add_grant(grant);
+ } else if (canned_acl == "authenticated-read") {
+ ACLGrant grant;
+ grant.set_group(ACL_GROUP_AUTHENTICATED_USERS, RGW_PERM_READ);
+ acl.add_grant(grant);
+ } else if (canned_acl == "bucket-owner-read") {
+ if (bid != owner.id) {
+ ACLGrant grant;
+ grant.set_canon(bid, bname, RGW_PERM_READ);
+ acl.add_grant(grant);
+ }
+ } else if (canned_acl == "bucket-owner-full-control") {
+ if (bid != owner.id) {
+ ACLGrant grant;
+ grant.set_canon(bid, bname, RGW_PERM_FULL_CONTROL);
+ acl.add_grant(grant);
+ }
} else {
return -EINVAL;
}
@@ -405,21 +413,11 @@ int RGWAccessControlList_S3::create_canned(ACLOwner& owner, ACLOwner& bucket_own
return 0;
}
-int RGWAccessControlList_S3::create_from_grants(std::list<ACLGrant>& grants)
+class RGWAccessControlPolicy_S3 : public XMLObj
{
- if (grants.empty())
- return -EINVAL;
-
- acl_user_map.clear();
- grant_map.clear();
-
- for (std::list<ACLGrant>::iterator it = grants.begin(); it != grants.end(); ++it) {
- ACLGrant g = *it;
- add_grant(&g);
- }
-
- return 0;
-}
+public:
+ bool xml_end(const char *el) override;
+};
bool RGWAccessControlPolicy_S3::xml_end(const char *el) {
RGWAccessControlList_S3 *s3acl =
@@ -427,21 +425,17 @@ bool RGWAccessControlPolicy_S3::xml_end(const char *el) {
if (!s3acl)
return false;
- acl = *s3acl;
-
- ACLOwner *owner_p = static_cast<ACLOwner_S3 *>(find_first("Owner"));
+ ACLOwner_S3 *owner_p = static_cast<ACLOwner_S3 *>(find_first("Owner"));
if (!owner_p)
return false;
- owner = *owner_p;
return true;
}
-void RGWAccessControlPolicy_S3::to_xml(ostream& out) {
+void to_xml(const RGWAccessControlPolicy& p, std::ostream& out)
+{
out << "<AccessControlPolicy xmlns=\"" << XMLNS_AWS_S3 << "\">";
- ACLOwner_S3& _owner = static_cast<ACLOwner_S3 &>(owner);
- RGWAccessControlList_S3& _acl = static_cast<RGWAccessControlList_S3 &>(acl);
- _owner.to_xml(out);
- _acl.to_xml(out);
+ to_xml(p.get_owner(), out);
+ to_xml(p.get_acl(), out);
out << "</AccessControlPolicy>";
}
@@ -454,164 +448,77 @@ static const s3_acl_header acl_header_perms[] = {
{0, NULL}
};
-int RGWAccessControlPolicy_S3::create_from_headers(const DoutPrefixProvider *dpp,
- rgw::sal::Driver* driver,
- const RGWEnv *env, ACLOwner& _owner)
+static int resolve_grant(const DoutPrefixProvider* dpp, optional_yield y,
+ rgw::sal::Driver* driver, ACLGrant_S3& xml_grant,
+ ACLGrant& grant, std::string& err_msg)
{
- std::list<ACLGrant> grants;
- int r = 0;
+ const uint32_t perm = xml_grant.permission->flags;
- for (const struct s3_acl_header *p = acl_header_perms; p->rgw_perm; p++) {
- r = parse_acl_header(dpp, driver, env, p, grants);
- if (r < 0) {
- return r;
+ std::unique_ptr<rgw::sal::User> user;
+ switch (xml_grant.type.get_type()) {
+ case ACL_TYPE_EMAIL_USER:
+ if (xml_grant.email.empty()) {
+ return -EINVAL;
}
- }
-
- RGWAccessControlList_S3& _acl = static_cast<RGWAccessControlList_S3 &>(acl);
- r = _acl.create_from_grants(grants);
-
- owner = _owner;
-
- return r;
-}
+ if (driver->get_user_by_email(dpp, xml_grant.email, y, &user) < 0) {
+ ldpp_dout(dpp, 10) << "grant user email not found or other error" << dendl;
+ err_msg = "The e-mail address you provided does not match any account on record.";
+ return -ERR_UNRESOLVABLE_EMAIL;
+ }
+ grant.set_canon(user->get_id(), user->get_display_name(), perm);
+ return 0;
-/*
- can only be called on object that was parsed
- */
-int RGWAccessControlPolicy_S3::rebuild(const DoutPrefixProvider *dpp,
- rgw::sal::Driver* driver, ACLOwner *owner,
- RGWAccessControlPolicy& dest, std::string &err_msg)
-{
- if (!owner)
- return -EINVAL;
+ case ACL_TYPE_CANON_USER:
+ user = driver->get_user(rgw_user{xml_grant.id});
+ if (user->load_user(dpp, y) < 0) {
+ ldpp_dout(dpp, 10) << "grant user does not exist: " << xml_grant.id << dendl;
+ err_msg = "Invalid CanonicalUser id";
+ return -EINVAL;
+ }
+ grant.set_canon(user->get_id(), user->get_display_name(), perm);
+ return 0;
- ACLOwner *requested_owner = static_cast<ACLOwner_S3 *>(find_first("Owner"));
- if (requested_owner) {
- rgw_user& requested_id = requested_owner->get_id();
- if (!requested_id.empty() && requested_id.compare(owner->get_id()) != 0)
- return -EPERM;
- }
+ case ACL_TYPE_GROUP:
+ if (const auto group = rgw::s3::acl_uri_to_group(xml_grant.uri);
+ group != ACL_GROUP_NONE) {
+ grant.set_group(group, perm);
+ return 0;
+ } else {
+ ldpp_dout(dpp, 10) << "bad grant group: " << xml_grant.uri << dendl;
+ err_msg = "Invalid group uri";
+ return -EINVAL;
+ }
- std::unique_ptr<rgw::sal::User> user = driver->get_user(owner->get_id());
- if (user->load_user(dpp, null_yield) < 0) {
- ldpp_dout(dpp, 10) << "owner info does not exist" << dendl;
- err_msg = "Invalid id";
+ case ACL_TYPE_REFERER:
+ case ACL_TYPE_UNKNOWN:
+ default:
+ err_msg = "Invalid Grantee type";
return -EINVAL;
}
- ACLOwner& dest_owner = dest.get_owner();
- dest_owner.set_id(owner->get_id());
- dest_owner.set_name(user->get_display_name());
-
- ldpp_dout(dpp, 20) << "owner id=" << owner->get_id() << dendl;
- ldpp_dout(dpp, 20) << "dest owner id=" << dest.get_owner().get_id() << dendl;
-
- RGWAccessControlList& dst_acl = dest.get_acl();
-
- multimap<string, ACLGrant>& grant_map = acl.get_grant_map();
- multimap<string, ACLGrant>::iterator iter;
- for (iter = grant_map.begin(); iter != grant_map.end(); ++iter) {
- ACLGrant& src_grant = iter->second;
- ACLGranteeType& type = src_grant.get_type();
- ACLGrant new_grant;
- bool grant_ok = false;
- rgw_user uid;
- RGWUserInfo grant_user;
- switch (type.get_type()) {
- case ACL_TYPE_EMAIL_USER:
- {
- string email;
- rgw_user u;
- if (!src_grant.get_id(u)) {
- ldpp_dout(dpp, 0) << "ERROR: src_grant.get_id() failed" << dendl;
- return -EINVAL;
- }
- email = u.id;
- ldpp_dout(dpp, 10) << "grant user email=" << email << dendl;
- if (driver->get_user_by_email(dpp, email, null_yield, &user) < 0) {
- ldpp_dout(dpp, 10) << "grant user email not found or other error" << dendl;
- err_msg = "The e-mail address you provided does not match any account on record.";
- return -ERR_UNRESOLVABLE_EMAIL;
- }
- grant_user = user->get_info();
- uid = grant_user.user_id;
- }
- case ACL_TYPE_CANON_USER:
- {
- if (type.get_type() == ACL_TYPE_CANON_USER) {
- if (!src_grant.get_id(uid)) {
- ldpp_dout(dpp, 0) << "ERROR: src_grant.get_id() failed" << dendl;
- err_msg = "Invalid id";
- return -EINVAL;
- }
- }
-
- if (grant_user.user_id.empty()) {
- user = driver->get_user(uid);
- if (user->load_user(dpp, null_yield) < 0) {
- ldpp_dout(dpp, 10) << "grant user does not exist:" << uid << dendl;
- err_msg = "Invalid id";
- return -EINVAL;
- } else {
- grant_user = user->get_info();
- }
- }
- ACLPermission& perm = src_grant.get_permission();
- new_grant.set_canon(uid, grant_user.display_name, perm.get_permissions());
- grant_ok = true;
- rgw_user new_id;
- new_grant.get_id(new_id);
- ldpp_dout(dpp, 10) << "new grant: " << new_id << ":" << grant_user.display_name << dendl;
- }
- break;
- case ACL_TYPE_GROUP:
- {
- string uri;
- if (ACLGrant_S3::group_to_uri(src_grant.get_group(), uri)) {
- new_grant = src_grant;
- grant_ok = true;
- ldpp_dout(dpp, 10) << "new grant: " << uri << dendl;
- } else {
- ldpp_dout(dpp, 10) << "bad grant group:" << (int)src_grant.get_group() << dendl;
- err_msg = "Invalid group uri";
- return -EINVAL;
- }
- }
- default:
- break;
- }
- if (grant_ok) {
- dst_acl.add_grant(&new_grant);
- }
- }
-
- return 0;
}
-bool RGWAccessControlPolicy_S3::compare_group_name(string& id, ACLGroupTypeEnum group)
+/**
+ * Interfaces with the webserver's XML handling code
+ * to parse it in a way that makes sense for the rgw.
+ */
+class RGWACLXMLParser_S3 : public RGWXMLParser
{
- switch (group) {
- case ACL_GROUP_ALL_USERS:
- return (id.compare(RGW_USER_ANON_ID) == 0);
- case ACL_GROUP_AUTHENTICATED_USERS:
- return (id.compare(rgw_uri_auth_users) == 0);
- default:
- return id.empty();
- }
+ CephContext *cct;
- // shouldn't get here
- return false;
-}
+ XMLObj *alloc_obj(const char *el) override;
+public:
+ explicit RGWACLXMLParser_S3(CephContext *_cct) : cct(_cct) {}
+};
XMLObj *RGWACLXMLParser_S3::alloc_obj(const char *el)
{
XMLObj * obj = NULL;
if (strcmp(el, "AccessControlPolicy") == 0) {
- obj = new RGWAccessControlPolicy_S3(cct);
+ obj = new RGWAccessControlPolicy_S3();
} else if (strcmp(el, "Owner") == 0) {
obj = new ACLOwner_S3();
} else if (strcmp(el, "AccessControlList") == 0) {
- obj = new RGWAccessControlList_S3(cct);
+ obj = new RGWAccessControlList_S3();
} else if (strcmp(el, "ID") == 0) {
obj = new ACLID_S3();
} else if (strcmp(el, "DisplayName") == 0) {
@@ -631,13 +538,134 @@ XMLObj *RGWACLXMLParser_S3::alloc_obj(const char *el)
return obj;
}
-ACLGroupTypeEnum ACLGrant_S3::uri_to_group(string& uri)
+namespace rgw::s3 {
+
+ACLGroupTypeEnum acl_uri_to_group(std::string_view uri)
{
- if (uri.compare(rgw_uri_all_users) == 0)
+ if (uri == rgw_uri_all_users)
return ACL_GROUP_ALL_USERS;
- else if (uri.compare(rgw_uri_auth_users) == 0)
+ else if (uri == rgw_uri_auth_users)
return ACL_GROUP_AUTHENTICATED_USERS;
return ACL_GROUP_NONE;
}
+bool acl_group_to_uri(ACLGroupTypeEnum group, std::string& uri)
+{
+ switch (group) {
+ case ACL_GROUP_ALL_USERS:
+ uri = rgw_uri_all_users;
+ return true;
+ case ACL_GROUP_AUTHENTICATED_USERS:
+ uri = rgw_uri_auth_users;
+ return true;
+ default:
+ return false;
+ }
+}
+
+int parse_policy(const DoutPrefixProvider* dpp, optional_yield y,
+ rgw::sal::Driver* driver, std::string_view document,
+ RGWAccessControlPolicy& policy, std::string& err_msg)
+{
+ RGWACLXMLParser_S3 parser(dpp->get_cct());
+ if (!parser.init()) {
+ return -EINVAL;
+ }
+ if (!parser.parse(document.data(), document.size(), 1)) {
+ return -EINVAL;
+ }
+
+ const auto xml_root = static_cast<RGWAccessControlPolicy_S3*>(
+ parser.find_first("AccessControlPolicy"));
+ if (!xml_root) {
+ err_msg = "Missing element AccessControlPolicy";
+ return -EINVAL;
+ }
+
+ const auto xml_owner = static_cast<ACLOwner_S3*>(
+ xml_root->find_first("Owner"));
+ if (!xml_owner) {
+ err_msg = "Missing element Owner";
+ return -EINVAL;
+ }
+
+ // owner must exist
+ std::unique_ptr<rgw::sal::User> user =
+ driver->get_user(rgw_user{xml_owner->id});
+ if (user->load_user(dpp, y) < 0) {
+ ldpp_dout(dpp, 10) << "acl owner does not exist" << dendl;
+ err_msg = "Invalid Owner ID";
+ return -EINVAL;
+ }
+
+ ACLOwner& owner = policy.get_owner();
+ owner.id = xml_owner->id;
+ if (!xml_owner->display_name.empty()) {
+ owner.display_name = xml_owner->display_name;
+ } else {
+ owner.display_name = user->get_display_name();
+ }
+
+ const auto xml_acl = static_cast<ACLOwner_S3*>(
+ xml_root->find_first("AccessControlList"));
+ if (!xml_acl) {
+ err_msg = "Missing element AccessControlList";
+ return -EINVAL;
+ }
+
+ // iterate parsed grants
+ XMLObjIter iter = xml_acl->find("Grant");
+ ACLGrant_S3* xml_grant = static_cast<ACLGrant_S3*>(iter.get_next());
+ while (xml_grant) {
+ ACLGrant grant;
+ int r = resolve_grant(dpp, y, driver, *xml_grant, grant, err_msg);
+ if (r < 0) {
+ return r;
+ }
+ policy.get_acl().add_grant(grant);
+ xml_grant = static_cast<ACLGrant_S3*>(iter.get_next());
+ }
+
+ return 0;
+}
+
+void write_policy_xml(const RGWAccessControlPolicy& policy,
+ std::ostream& out)
+{
+ to_xml(policy, out);
+}
+
+int create_canned_acl(const ACLOwner& owner,
+ const ACLOwner& bucket_owner,
+ const std::string& canned_acl,
+ RGWAccessControlPolicy& policy)
+{
+ if (owner.id == rgw_user("anonymous")) {
+ policy.set_owner(bucket_owner);
+ } else {
+ policy.set_owner(owner);
+ }
+ return create_canned(owner, bucket_owner, canned_acl, policy.get_acl());
+}
+
+int create_policy_from_headers(const DoutPrefixProvider* dpp,
+ rgw::sal::Driver* driver,
+ const ACLOwner& owner,
+ const RGWEnv& env,
+ RGWAccessControlPolicy& policy)
+{
+ policy.set_owner(owner);
+ auto& acl = policy.get_acl();
+
+ for (const s3_acl_header* p = acl_header_perms; p->rgw_perm; p++) {
+ int r = parse_acl_header(dpp, driver, env, p, acl);
+ if (r < 0) {
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+} // namespace rgw::s3
diff --git a/src/rgw/rgw_acl_s3.h b/src/rgw/rgw_acl_s3.h
index c234d722b99..2341461783f 100644
--- a/src/rgw/rgw_acl_s3.h
+++ b/src/rgw/rgw_acl_s3.h
@@ -8,108 +8,40 @@
#include <iosfwd>
#include <include/types.h>
-#include "include/str_list.h"
+#include "common/async/yield_context.h"
#include "rgw_xml.h"
#include "rgw_acl.h"
#include "rgw_sal_fwd.h"
-class RGWUserCtl;
-
-class ACLPermission_S3 : public ACLPermission, public XMLObj
-{
-public:
- ACLPermission_S3() {}
- virtual ~ACLPermission_S3() override {}
-
- bool xml_end(const char *el) override;
- void to_xml(std::ostream& out);
-};
-
-class ACLGrantee_S3 : public ACLGrantee, public XMLObj
-{
-public:
- ACLGrantee_S3() {}
- virtual ~ACLGrantee_S3() override {}
-
- bool xml_start(const char *el, const char **attr);
-};
-
-
-class ACLGrant_S3 : public ACLGrant, public XMLObj
-{
-public:
- ACLGrant_S3() {}
- virtual ~ACLGrant_S3() override {}
-
- void to_xml(CephContext *cct, std::ostream& out);
- bool xml_end(const char *el) override;
- bool xml_start(const char *el, const char **attr);
-
- static ACLGroupTypeEnum uri_to_group(std::string& uri);
- static bool group_to_uri(ACLGroupTypeEnum group, std::string& uri);
-};
-
-class RGWAccessControlList_S3 : public RGWAccessControlList, public XMLObj
-{
-public:
- explicit RGWAccessControlList_S3(CephContext *_cct) : RGWAccessControlList(_cct) {}
- virtual ~RGWAccessControlList_S3() override {}
-
- bool xml_end(const char *el) override;
- void to_xml(std::ostream& out);
-
- int create_canned(ACLOwner& owner, ACLOwner& bucket_owner, const std::string& canned_acl);
- int create_from_grants(std::list<ACLGrant>& grants);
-};
-
-class ACLOwner_S3 : public ACLOwner, public XMLObj
-{
-public:
- ACLOwner_S3() {}
- virtual ~ACLOwner_S3() override {}
-
- bool xml_end(const char *el) override;
- void to_xml(std::ostream& out);
-};
-
class RGWEnv;
-class RGWAccessControlPolicy_S3 : public RGWAccessControlPolicy, public XMLObj
-{
-public:
- explicit RGWAccessControlPolicy_S3(CephContext *_cct) : RGWAccessControlPolicy(_cct) {}
- virtual ~RGWAccessControlPolicy_S3() override {}
+namespace rgw::s3 {
+
+ACLGroupTypeEnum acl_uri_to_group(std::string_view uri);
+bool acl_group_to_uri(ACLGroupTypeEnum group, std::string& uri);
- bool xml_end(const char *el) override;
+/// Construct a policy from an AccessControlPolicy xml document. Email grantees
+/// are looked up and converted to a corresponding CanonicalUser grant. All user
+/// ids are verified to exist.
+int parse_policy(const DoutPrefixProvider* dpp, optional_yield y,
+ rgw::sal::Driver* driver, std::string_view document,
+ RGWAccessControlPolicy& policy, std::string& err_msg);
- void to_xml(std::ostream& out);
- int rebuild(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, ACLOwner *owner,
- RGWAccessControlPolicy& dest, std::string &err_msg);
- bool compare_group_name(std::string& id, ACLGroupTypeEnum group) override;
+/// Write an AccessControlPolicy xml document for the given policy.
+void write_policy_xml(const RGWAccessControlPolicy& policy,
+ std::ostream& out);
- virtual int create_canned(ACLOwner& _owner, ACLOwner& bucket_owner, const std::string& canned_acl) {
- RGWAccessControlList_S3& _acl = static_cast<RGWAccessControlList_S3 &>(acl);
- if (_owner.get_id() == rgw_user("anonymous")) {
- owner = bucket_owner;
- } else {
- owner = _owner;
- }
- int ret = _acl.create_canned(owner, bucket_owner, canned_acl);
- return ret;
- }
- int create_from_headers(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver,
- const RGWEnv *env, ACLOwner& _owner);
-};
+/// Construct a policy from a s3 canned acl string.
+int create_canned_acl(const ACLOwner& owner,
+ const ACLOwner& bucket_owner,
+ const std::string& canned_acl,
+ RGWAccessControlPolicy& policy);
-/**
- * Interfaces with the webserver's XML handling code
- * to parse it in a way that makes sense for the rgw.
- */
-class RGWACLXMLParser_S3 : public RGWXMLParser
-{
- CephContext *cct;
+/// Construct a policy from x-amz-grant-* request headers.
+int create_policy_from_headers(const DoutPrefixProvider* dpp,
+ rgw::sal::Driver* driver,
+ const ACLOwner& owner,
+ const RGWEnv& env,
+ RGWAccessControlPolicy& policy);
- XMLObj *alloc_obj(const char *el) override;
-public:
- explicit RGWACLXMLParser_S3(CephContext *_cct) : cct(_cct) {}
-};
+} // namespace rgw::s3
diff --git a/src/rgw/rgw_acl_swift.cc b/src/rgw/rgw_acl_swift.cc
index c6f06bbd37c..260f4530d41 100644
--- a/src/rgw/rgw_acl_swift.cc
+++ b/src/rgw/rgw_acl_swift.cc
@@ -3,11 +3,13 @@
#include <string.h>
+#include <optional>
#include <vector>
#include <boost/algorithm/string/predicate.hpp>
#include "common/ceph_json.h"
+#include "common/split.h"
#include "rgw_common.h"
#include "rgw_user.h"
#include "rgw_acl_swift.h"
@@ -26,27 +28,6 @@
using namespace std;
-static int parse_list(const char* uid_list,
- std::vector<std::string>& uids) /* out */
-{
- char *s = strdup(uid_list);
- if (!s) {
- return -ENOMEM;
- }
-
- char *tokctx;
- const char *p = strtok_r(s, " ,", &tokctx);
- while (p) {
- if (*p) {
- string acl = p;
- uids.push_back(acl);
- }
- p = strtok_r(NULL, " ,", &tokctx);
- }
- free(s);
- return 0;
-}
-
static bool is_referrer(const std::string& designator)
{
return designator.compare(".r") == 0 ||
@@ -73,8 +54,8 @@ static bool uid_is_public(const string& uid)
return is_referrer(sub);
}
-static boost::optional<ACLGrant> referrer_to_grant(std::string url_spec,
- const uint32_t perm)
+static std::optional<ACLGrant> referrer_to_grant(std::string url_spec,
+ const uint32_t perm)
{
/* This function takes url_spec as non-ref std::string because of the trim
* operation that is essential to preserve compliance with Swift. It can't
@@ -99,7 +80,7 @@ static boost::optional<ACLGrant> referrer_to_grant(std::string url_spec,
}
if (url_spec.empty() || url_spec == ".") {
- return boost::none;
+ return std::nullopt;
}
} else {
/* Please be aware we're specially handling the .r:* in _add_grant()
@@ -110,21 +91,18 @@ static boost::optional<ACLGrant> referrer_to_grant(std::string url_spec,
grant.set_referer(url_spec, is_negative ? 0 : perm);
return grant;
} catch (const std::out_of_range&) {
- return boost::none;
+ return std::nullopt;
}
}
static ACLGrant user_to_grant(const DoutPrefixProvider *dpp,
- CephContext* const cct,
rgw::sal::Driver* driver,
const std::string& uid,
const uint32_t perm)
{
- RGWUserInfo grant_user;
ACLGrant grant;
- std::unique_ptr<rgw::sal::User> user;
- user = driver->get_user(rgw_user(uid));
+ std::unique_ptr<rgw::sal::User> user = driver->get_user(rgw_user(uid));
if (user->load_user(dpp, null_yield) < 0) {
ldpp_dout(dpp, 10) << "grant user does not exist: " << uid << dendl;
/* skipping silently */
@@ -136,101 +114,102 @@ static ACLGrant user_to_grant(const DoutPrefixProvider *dpp,
return grant;
}
-int RGWAccessControlPolicy_SWIFT::add_grants(const DoutPrefixProvider *dpp,
- rgw::sal::Driver* driver,
- const std::vector<std::string>& uids,
- const uint32_t perm)
+// parse a container acl grant in 'V1' format
+// https://docs.openstack.org/swift/latest/overview_acl.html#container-acls
+static auto parse_grant(const DoutPrefixProvider* dpp,
+ rgw::sal::Driver* driver,
+ const std::string& uid,
+ const uint32_t perm)
+ -> std::optional<ACLGrant>
{
- for (const auto& uid : uids) {
- boost::optional<ACLGrant> grant;
- ldpp_dout(dpp, 20) << "trying to add grant for ACL uid=" << uid << dendl;
-
- /* Let's check whether the item has a separator potentially indicating
- * a special meaning (like an HTTP referral-based grant). */
- const size_t pos = uid.find(':');
- if (std::string::npos == pos) {
- /* No, it don't have -- we've got just a regular user identifier. */
- grant = user_to_grant(dpp, cct, driver, uid, perm);
- } else {
- /* Yes, *potentially* an HTTP referral. */
- auto designator = uid.substr(0, pos);
- auto designatee = uid.substr(pos + 1);
-
- /* Swift strips whitespaces at both beginning and end. */
- boost::algorithm::trim(designator);
- boost::algorithm::trim(designatee);
-
- if (! boost::algorithm::starts_with(designator, ".")) {
- grant = user_to_grant(dpp, cct, driver, uid, perm);
- } else if ((perm & SWIFT_PERM_WRITE) == 0 && is_referrer(designator)) {
- /* HTTP referrer-based ACLs aren't acceptable for writes. */
- grant = referrer_to_grant(designatee, perm);
- }
- }
+ ldpp_dout(dpp, 20) << "trying to add grant for ACL uid=" << uid << dendl;
+
+ /* Let's check whether the item has a separator potentially indicating
+ * a special meaning (like an HTTP referral-based grant). */
+ const size_t pos = uid.find(':');
+ if (std::string::npos == pos) {
+ /* No, it don't have -- we've got just a regular user identifier. */
+ return user_to_grant(dpp, driver, uid, perm);
+ }
- if (grant) {
- acl.add_grant(&*grant);
- } else {
- return -EINVAL;
- }
+ /* Yes, *potentially* an HTTP referral. */
+ auto designator = uid.substr(0, pos);
+ auto designatee = uid.substr(pos + 1);
+
+ /* Swift strips whitespaces at both beginning and end. */
+ boost::algorithm::trim(designator);
+ boost::algorithm::trim(designatee);
+
+ if (! boost::algorithm::starts_with(designator, ".")) {
+ return user_to_grant(dpp, driver, uid, perm);
+ }
+ if ((perm & SWIFT_PERM_WRITE) == 0 && is_referrer(designator)) {
+ /* HTTP referrer-based ACLs aren't acceptable for writes. */
+ return referrer_to_grant(designatee, perm);
}
- return 0;
+ return std::nullopt;
}
+static void add_grants(const DoutPrefixProvider* dpp,
+ rgw::sal::Driver* driver,
+ const std::vector<std::string>& uids,
+ uint32_t perm, RGWAccessControlList& acl)
+{
+ for (const auto& uid : uids) {
+ ACLGrant grant;
+ if (uid_is_public(uid)) {
+ grant.set_group(ACL_GROUP_ALL_USERS, perm);
+ } else {
+ grant = user_to_grant(dpp, driver, uid, perm);
+ }
+ acl.add_grant(grant);
+ }
+}
+
+namespace rgw::swift {
-int RGWAccessControlPolicy_SWIFT::create(const DoutPrefixProvider *dpp,
- rgw::sal::Driver* driver,
- const rgw_user& id,
- const std::string& name,
- const char* read_list,
- const char* write_list,
- uint32_t& rw_mask)
+int create_container_policy(const DoutPrefixProvider *dpp,
+ rgw::sal::Driver* driver,
+ const rgw_user& id,
+ const std::string& name,
+ const char* read_list,
+ const char* write_list,
+ uint32_t& rw_mask,
+ RGWAccessControlPolicy& policy)
{
- acl.create_default(id, name);
- owner.set_id(id);
- owner.set_name(name);
- rw_mask = 0;
+ policy.create_default(id, name);
+ auto& acl = policy.get_acl();
if (read_list) {
- std::vector<std::string> uids;
- int r = parse_list(read_list, uids);
- if (r < 0) {
- ldpp_dout(dpp, 0) << "ERROR: parse_list for read returned r="
- << r << dendl;
- return r;
- }
-
- r = add_grants(dpp, driver, uids, SWIFT_PERM_READ);
- if (r < 0) {
- ldpp_dout(dpp, 0) << "ERROR: add_grants for read returned r="
- << r << dendl;
- return r;
+ for (std::string_view uid : ceph::split(read_list, " ,")) {
+ auto grant = parse_grant(dpp, driver, std::string{uid}, SWIFT_PERM_READ);
+ if (!grant) {
+ ldpp_dout(dpp, 4) << "ERROR: failed to parse read acl grant "
+ << uid << dendl;
+ return -EINVAL;
+ }
+ acl.add_grant(*grant);
}
rw_mask |= SWIFT_PERM_READ;
}
if (write_list) {
- std::vector<std::string> uids;
- int r = parse_list(write_list, uids);
- if (r < 0) {
- ldpp_dout(dpp, 0) << "ERROR: parse_list for write returned r="
- << r << dendl;
- return r;
- }
-
- r = add_grants(dpp, driver, uids, SWIFT_PERM_WRITE);
- if (r < 0) {
- ldpp_dout(dpp, 0) << "ERROR: add_grants for write returned r="
- << r << dendl;
- return r;
+ for (std::string_view uid : ceph::split(write_list, " ,")) {
+ auto grant = parse_grant(dpp, driver, std::string{uid}, SWIFT_PERM_WRITE);
+ if (!grant) {
+ ldpp_dout(dpp, 4) << "ERROR: failed to parse write acl grant "
+ << uid << dendl;
+ return -EINVAL;
+ }
+ acl.add_grant(*grant);
}
rw_mask |= SWIFT_PERM_WRITE;
}
return 0;
}
-void RGWAccessControlPolicy_SWIFT::filter_merge(uint32_t rw_mask,
- RGWAccessControlPolicy_SWIFT *old)
+void merge_policy(uint32_t rw_mask, const RGWAccessControlPolicy& src,
+ RGWAccessControlPolicy& dest)
{
/* rw_mask&SWIFT_PERM_READ => setting read acl,
* rw_mask&SWIFT_PERM_WRITE => setting write acl
@@ -240,112 +219,78 @@ void RGWAccessControlPolicy_SWIFT::filter_merge(uint32_t rw_mask,
return;
}
rw_mask ^= (SWIFT_PERM_READ|SWIFT_PERM_WRITE);
- for (auto &iter: old->acl.get_grant_map()) {
- ACLGrant& grant = iter.second;
+ for (const auto &iter: src.get_acl().get_grant_map()) {
+ const ACLGrant& grant = iter.second;
uint32_t perm = grant.get_permission().get_permissions();
- rgw_user id;
- string url_spec;
- if (!grant.get_id(id)) {
- if (grant.get_group() != ACL_GROUP_ALL_USERS) {
- url_spec = grant.get_referer();
- if (url_spec.empty()) {
- continue;
- }
- if (perm == 0) {
- /* We need to carry also negative, HTTP referrer-based ACLs. */
- perm = SWIFT_PERM_READ;
- }
+ if (const auto* referer = grant.get_referer(); referer) {
+ if (referer->url_spec.empty()) {
+ continue;
+ }
+ if (perm == 0) {
+ /* We need to carry also negative, HTTP referrer-based ACLs. */
+ perm = SWIFT_PERM_READ;
}
}
if (perm & rw_mask) {
- acl.add_grant(&grant);
+ dest.get_acl().add_grant(grant);
}
}
}
-void RGWAccessControlPolicy_SWIFT::to_str(string& read, string& write)
+void format_container_acls(const RGWAccessControlPolicy& policy,
+ std::string& read, std::string& write)
{
- multimap<string, ACLGrant>& m = acl.get_grant_map();
- multimap<string, ACLGrant>::iterator iter;
-
- for (iter = m.begin(); iter != m.end(); ++iter) {
- ACLGrant& grant = iter->second;
+ for (const auto& [k, grant] : policy.get_acl().get_grant_map()) {
const uint32_t perm = grant.get_permission().get_permissions();
- rgw_user id;
- string url_spec;
- if (!grant.get_id(id)) {
- if (grant.get_group() == ACL_GROUP_ALL_USERS) {
+ std::string id;
+ std::string url_spec;
+ if (const auto user = grant.get_user(); user) {
+ id = user->id.to_str();
+ } else if (const auto group = grant.get_group(); group) {
+ if (group->type == ACL_GROUP_ALL_USERS) {
id = SWIFT_GROUP_ALL_USERS;
- } else {
- url_spec = grant.get_referer();
- if (url_spec.empty()) {
- continue;
- }
- id = (perm != 0) ? ".r:" + url_spec : ".r:-" + url_spec;
}
+ } else if (const auto referer = grant.get_referer(); referer) {
+ url_spec = referer->url_spec;
+ if (url_spec.empty()) {
+ continue;
+ }
+ id = (perm != 0) ? ".r:" + url_spec : ".r:-" + url_spec;
}
if (perm & SWIFT_PERM_READ) {
if (!read.empty()) {
read.append(",");
}
- read.append(id.to_str());
+ read.append(id);
} else if (perm & SWIFT_PERM_WRITE) {
if (!write.empty()) {
write.append(",");
}
- write.append(id.to_str());
+ write.append(id);
} else if (perm == 0 && !url_spec.empty()) {
/* only X-Container-Read headers support referers */
if (!read.empty()) {
read.append(",");
}
- read.append(id.to_str());
- }
- }
-}
-
-void RGWAccessControlPolicy_SWIFTAcct::add_grants(const DoutPrefixProvider *dpp,
- rgw::sal::Driver* driver,
- const std::vector<std::string>& uids,
- const uint32_t perm)
-{
- for (const auto& uid : uids) {
- ACLGrant grant;
-
- if (uid_is_public(uid)) {
- grant.set_group(ACL_GROUP_ALL_USERS, perm);
- acl.add_grant(&grant);
- } else {
- std::unique_ptr<rgw::sal::User> user = driver->get_user(rgw_user(uid));
-
- if (user->load_user(dpp, null_yield) < 0) {
- ldpp_dout(dpp, 10) << "grant user does not exist:" << uid << dendl;
- /* skipping silently */
- grant.set_canon(user->get_id(), std::string(), perm);
- acl.add_grant(&grant);
- } else {
- grant.set_canon(user->get_id(), user->get_display_name(), perm);
- acl.add_grant(&grant);
- }
+ read.append(id);
}
}
}
-bool RGWAccessControlPolicy_SWIFTAcct::create(const DoutPrefixProvider *dpp,
- rgw::sal::Driver* driver,
- const rgw_user& id,
- const std::string& name,
- const std::string& acl_str)
+int create_account_policy(const DoutPrefixProvider* dpp,
+ rgw::sal::Driver* driver,
+ const rgw_user& id,
+ const std::string& name,
+ const std::string& acl_str,
+ RGWAccessControlPolicy& policy)
{
- acl.create_default(id, name);
- owner.set_id(id);
- owner.set_name(name);
+ policy.create_default(id, name);
+ auto& acl = policy.get_acl();
JSONParser parser;
-
if (!parser.parse(acl_str.c_str(), acl_str.length())) {
ldpp_dout(dpp, 0) << "ERROR: JSONParser::parse returned error=" << dendl;
- return false;
+ return -EINVAL;
}
JSONObjIter iter = parser.find_first("admin");
@@ -354,7 +299,7 @@ bool RGWAccessControlPolicy_SWIFTAcct::create(const DoutPrefixProvider *dpp,
decode_json_obj(admin, *iter);
ldpp_dout(dpp, 0) << "admins: " << admin << dendl;
- add_grants(dpp, driver, admin, SWIFT_PERM_ADMIN);
+ add_grants(dpp, driver, admin, SWIFT_PERM_ADMIN, acl);
}
iter = parser.find_first("read-write");
@@ -363,7 +308,7 @@ bool RGWAccessControlPolicy_SWIFTAcct::create(const DoutPrefixProvider *dpp,
decode_json_obj(readwrite, *iter);
ldpp_dout(dpp, 0) << "read-write: " << readwrite << dendl;
- add_grants(dpp, driver, readwrite, SWIFT_PERM_RWRT);
+ add_grants(dpp, driver, readwrite, SWIFT_PERM_RWRT, acl);
}
iter = parser.find_first("read-only");
@@ -372,39 +317,47 @@ bool RGWAccessControlPolicy_SWIFTAcct::create(const DoutPrefixProvider *dpp,
decode_json_obj(readonly, *iter);
ldpp_dout(dpp, 0) << "read-only: " << readonly << dendl;
- add_grants(dpp, driver, readonly, SWIFT_PERM_READ);
+ add_grants(dpp, driver, readonly, SWIFT_PERM_READ, acl);
}
- return true;
+ return 0;
}
-boost::optional<std::string> RGWAccessControlPolicy_SWIFTAcct::to_str() const
+auto format_account_acl(const RGWAccessControlPolicy& policy)
+ -> std::optional<std::string>
{
+ const ACLOwner& owner = policy.get_owner();
+
std::vector<std::string> admin;
std::vector<std::string> readwrite;
std::vector<std::string> readonly;
/* Partition the grant map into three not-overlapping groups. */
- for (const auto& item : get_acl().get_grant_map()) {
+ for (const auto& item : policy.get_acl().get_grant_map()) {
const ACLGrant& grant = item.second;
const uint32_t perm = grant.get_permission().get_permissions();
- rgw_user id;
- if (!grant.get_id(id)) {
- if (grant.get_group() != ACL_GROUP_ALL_USERS) {
+ std::string id;
+ if (const auto user = grant.get_user(); user) {
+ if (owner.id == user->id) {
+ continue;
+ }
+ id = user->id.to_str();
+ } else if (const auto group = grant.get_group(); group) {
+ if (group->type != ACL_GROUP_ALL_USERS) {
continue;
}
id = SWIFT_GROUP_ALL_USERS;
- } else if (owner.get_id() == id) {
+ } else {
continue;
}
if (SWIFT_PERM_ADMIN == (perm & SWIFT_PERM_ADMIN)) {
- admin.insert(admin.end(), id.to_str());
+ admin.insert(admin.end(), id);
} else if (SWIFT_PERM_RWRT == (perm & SWIFT_PERM_RWRT)) {
- readwrite.insert(readwrite.end(), id.to_str());
+ readwrite.insert(readwrite.end(), id);
} else if (SWIFT_PERM_READ == (perm & SWIFT_PERM_READ)) {
- readonly.insert(readonly.end(), id.to_str());
+ readonly.insert(readonly.end(), id);
} else {
// FIXME: print a warning
}
@@ -413,7 +366,7 @@ boost::optional<std::string> RGWAccessControlPolicy_SWIFTAcct::to_str() const
/* If there is no grant to serialize, let's exit earlier to not return
* an empty JSON object which brakes the functional tests of Swift. */
if (admin.empty() && readwrite.empty() && readonly.empty()) {
- return boost::none;
+ return std::nullopt;
}
/* Serialize the groups. */
@@ -436,3 +389,5 @@ boost::optional<std::string> RGWAccessControlPolicy_SWIFTAcct::to_str() const
return oss.str();
}
+
+} // namespace rgw::swift
diff --git a/src/rgw/rgw_acl_swift.h b/src/rgw/rgw_acl_swift.h
index 4cb1e4b8f8f..a16bea894db 100644
--- a/src/rgw/rgw_acl_swift.h
+++ b/src/rgw/rgw_acl_swift.h
@@ -3,56 +3,46 @@
#pragma once
-#include <map>
-#include <vector>
#include <string>
-#include <include/types.h>
-
-#include <boost/optional.hpp>
-
-#include "rgw_acl.h"
-
-class RGWUserCtl;
-
-class RGWAccessControlPolicy_SWIFT : public RGWAccessControlPolicy
-{
- int add_grants(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver,
- const std::vector<std::string>& uids,
- uint32_t perm);
-
-public:
- explicit RGWAccessControlPolicy_SWIFT(CephContext* const cct)
- : RGWAccessControlPolicy(cct) {
- }
- ~RGWAccessControlPolicy_SWIFT() override = default;
-
- int create(const DoutPrefixProvider *dpp,
- rgw::sal::Driver* driver,
- const rgw_user& id,
- const std::string& name,
- const char* read_list,
- const char* write_list,
- uint32_t& rw_mask);
- void filter_merge(uint32_t mask, RGWAccessControlPolicy_SWIFT *policy);
- void to_str(std::string& read, std::string& write);
-};
-
-class RGWAccessControlPolicy_SWIFTAcct : public RGWAccessControlPolicy
-{
-public:
- explicit RGWAccessControlPolicy_SWIFTAcct(CephContext * const cct)
- : RGWAccessControlPolicy(cct) {
- }
- ~RGWAccessControlPolicy_SWIFTAcct() override {}
-
- void add_grants(const DoutPrefixProvider *dpp,
- rgw::sal::Driver* driver,
- const std::vector<std::string>& uids,
- uint32_t perm);
- bool create(const DoutPrefixProvider *dpp,
- rgw::sal::Driver* driver,
- const rgw_user& id,
- const std::string& name,
- const std::string& acl_str);
- boost::optional<std::string> to_str() const;
-};
+#include "rgw_sal_fwd.h"
+#include "rgw_user_types.h"
+
+class DoutPrefixProvider;
+class RGWAccessControlPolicy;
+
+namespace rgw::swift {
+
+/// Create a policy based on swift container acl headers
+/// X-Container-Read/X-Container-Write.
+int create_container_policy(const DoutPrefixProvider *dpp,
+ rgw::sal::Driver* driver,
+ const rgw_user& id,
+ const std::string& name,
+ const char* read_list,
+ const char* write_list,
+ uint32_t& rw_mask,
+ RGWAccessControlPolicy& policy);
+
+/// Copy grants matching the permission mask (SWIFT_PERM_READ/WRITE) from
+/// one policy to another.
+void merge_policy(uint32_t rw_mask, const RGWAccessControlPolicy& src,
+ RGWAccessControlPolicy& dest);
+
+/// Format the policy in terms of X-Container-Read/X-Container-Write strings.
+void format_container_acls(const RGWAccessControlPolicy& policy,
+ std::string& read, std::string& write);
+
+/// Create a policy based on swift account acl header X-Account-Access-Control.
+int create_account_policy(const DoutPrefixProvider* dpp,
+ rgw::sal::Driver* driver,
+ const rgw_user& id,
+ const std::string& name,
+ const std::string& acl_str,
+ RGWAccessControlPolicy& policy);
+
+/// Format the policy in terms of the X-Account-Access-Control string. Returns
+/// std::nullopt if there are no admin/read-write/read-only entries.
+auto format_account_acl(const RGWAccessControlPolicy& policy)
+ -> std::optional<std::string>;
+
+} // namespace rgw::swift
diff --git a/src/rgw/rgw_acl_types.h b/src/rgw/rgw_acl_types.h
index c76d085e0b5..b9866e9b289 100644
--- a/src/rgw/rgw_acl_types.h
+++ b/src/rgw/rgw_acl_types.h
@@ -181,12 +181,14 @@ class ACLGranteeType
protected:
__u32 type;
public:
- ACLGranteeType() : type(ACL_TYPE_UNKNOWN) {}
- virtual ~ACLGranteeType() {}
-// virtual const char *to_string() = 0;
+ ACLGranteeType(ACLGranteeTypeEnum t = ACL_TYPE_UNKNOWN) : type(t) {}
+
ACLGranteeTypeEnum get_type() const { return (ACLGranteeTypeEnum)type; }
+ operator ACLGranteeTypeEnum() const { return get_type(); }
+
void set(ACLGranteeTypeEnum t) { type = t; }
-// virtual void set(const char *s) = 0;
+ ACLGranteeType& operator=(ACLGranteeTypeEnum t) { set(t); return *this; }
+
void encode(bufferlist& bl) const {
ENCODE_START(2, 2, bl);
encode(type, bl);
@@ -204,10 +206,3 @@ public:
friend bool operator!=(const ACLGranteeType& lhs, const ACLGranteeType& rhs);
};
WRITE_CLASS_ENCODER(ACLGranteeType)
-
-class ACLGrantee
-{
-public:
- ACLGrantee() {}
- ~ACLGrantee() {}
-};
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc
index c19d44c9e53..f4a2d1480bd 100644
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/rgw_admin.cc
@@ -2098,7 +2098,7 @@ stringstream& push_ss(stringstream& ss, list<string>& l, int tab = 0)
static void get_md_sync_status(list<string>& status)
{
- RGWMetaSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->rados->get_async_processor());
+ RGWMetaSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->async_processor);
int ret = sync.init(dpp());
if (ret < 0) {
@@ -2254,7 +2254,7 @@ static void get_data_sync_status(const rgw_zone_id& source_zone, list<string>& s
flush_ss(ss, status);
return;
}
- RGWDataSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->rados->get_async_processor(), source_zone, nullptr);
+ RGWDataSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->async_processor, source_zone, nullptr);
int ret = sync.init(dpp());
if (ret < 0) {
@@ -8995,7 +8995,7 @@ next:
}
if (opt_cmd == OPT::METADATA_SYNC_STATUS) {
- RGWMetaSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->rados->get_async_processor());
+ RGWMetaSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->async_processor);
int ret = sync.init(dpp());
if (ret < 0) {
@@ -9039,7 +9039,7 @@ next:
}
if (opt_cmd == OPT::METADATA_SYNC_INIT) {
- RGWMetaSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->rados->get_async_processor());
+ RGWMetaSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->async_processor);
int ret = sync.init(dpp());
if (ret < 0) {
@@ -9055,7 +9055,7 @@ next:
if (opt_cmd == OPT::METADATA_SYNC_RUN) {
- RGWMetaSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->rados->get_async_processor());
+ RGWMetaSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->async_processor);
int ret = sync.init(dpp());
if (ret < 0) {
@@ -9075,7 +9075,7 @@ next:
cerr << "ERROR: source zone not specified" << std::endl;
return EINVAL;
}
- RGWDataSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->rados->get_async_processor(), source_zone, nullptr);
+ RGWDataSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->async_processor, source_zone, nullptr);
int ret = sync.init(dpp());
if (ret < 0) {
@@ -9145,7 +9145,7 @@ next:
return EINVAL;
}
- RGWDataSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->rados->get_async_processor(), source_zone, nullptr);
+ RGWDataSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->async_processor, source_zone, nullptr);
int ret = sync.init(dpp());
if (ret < 0) {
@@ -9174,7 +9174,7 @@ next:
return ret;
}
- RGWDataSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->rados->get_async_processor(), source_zone, nullptr, sync_module);
+ RGWDataSyncStatusManager sync(static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->async_processor, source_zone, nullptr, sync_module);
ret = sync.init(dpp());
if (ret < 0) {
diff --git a/src/rgw/rgw_common.cc b/src/rgw/rgw_common.cc
index 6c07e323243..b16d9d97fa8 100644
--- a/src/rgw/rgw_common.cc
+++ b/src/rgw/rgw_common.cc
@@ -639,7 +639,7 @@ bool parse_iso8601(const char *s, struct tm *t, uint32_t *pns, bool extended_for
return true;
}
-int parse_key_value(string& in_str, const char *delim, string& key, string& val)
+int parse_key_value(const string& in_str, const char *delim, string& key, string& val)
{
if (delim == NULL)
return -EINVAL;
@@ -654,7 +654,7 @@ int parse_key_value(string& in_str, const char *delim, string& key, string& val)
return 0;
}
-int parse_key_value(string& in_str, string& key, string& val)
+int parse_key_value(const string& in_str, string& key, string& val)
{
return parse_key_value(in_str, "=", key,val);
}
@@ -1135,7 +1135,7 @@ Effect eval_identity_or_session_policies(const DoutPrefixProvider* dpp,
bool verify_user_permission(const DoutPrefixProvider* dpp,
perm_state_base * const s,
- RGWAccessControlPolicy * const user_acl,
+ const RGWAccessControlPolicy& user_acl,
const vector<rgw::IAM::Policy>& user_policies,
const vector<rgw::IAM::Policy>& session_policies,
const rgw::ARN& res,
@@ -1176,20 +1176,20 @@ bool verify_user_permission(const DoutPrefixProvider* dpp,
bool verify_user_permission_no_policy(const DoutPrefixProvider* dpp,
struct perm_state_base * const s,
- RGWAccessControlPolicy * const user_acl,
+ const RGWAccessControlPolicy& user_acl,
const int perm)
{
if (s->identity->get_identity_type() == TYPE_ROLE)
return false;
- /* S3 doesn't support account ACLs. */
- if (!user_acl)
+ /* S3 doesn't support account ACLs, so user_acl will be uninitialized. */
+ if (user_acl.get_owner().id.empty())
return true;
if ((perm & (int)s->perm_mask) != perm)
return false;
- return user_acl->verify_permission(dpp, *s->identity, perm, perm);
+ return user_acl.verify_permission(dpp, *s->identity, perm, perm);
}
bool verify_user_permission(const DoutPrefixProvider* dpp,
@@ -1199,7 +1199,7 @@ bool verify_user_permission(const DoutPrefixProvider* dpp,
bool mandatory_policy)
{
perm_state_from_req_state ps(s);
- return verify_user_permission(dpp, &ps, s->user_acl.get(), s->iam_user_policies, s->session_policies, res, op, mandatory_policy);
+ return verify_user_permission(dpp, &ps, s->user_acl, s->iam_user_policies, s->session_policies, res, op, mandatory_policy);
}
bool verify_user_permission_no_policy(const DoutPrefixProvider* dpp,
@@ -1207,7 +1207,7 @@ bool verify_user_permission_no_policy(const DoutPrefixProvider* dpp,
const int perm)
{
perm_state_from_req_state ps(s);
- return verify_user_permission_no_policy(dpp, &ps, s->user_acl.get(), perm);
+ return verify_user_permission_no_policy(dpp, &ps, s->user_acl, perm);
}
bool verify_requester_payer_permission(struct perm_state_base *s)
@@ -1233,8 +1233,8 @@ bool verify_requester_payer_permission(struct perm_state_base *s)
bool verify_bucket_permission(const DoutPrefixProvider* dpp,
struct perm_state_base * const s,
const rgw_bucket& bucket,
- RGWAccessControlPolicy * const user_acl,
- RGWAccessControlPolicy * const bucket_acl,
+ const RGWAccessControlPolicy& user_acl,
+ const RGWAccessControlPolicy& bucket_acl,
const boost::optional<Policy>& bucket_policy,
const vector<Policy>& identity_policies,
const vector<Policy>& session_policies,
@@ -1292,8 +1292,8 @@ bool verify_bucket_permission(const DoutPrefixProvider* dpp,
bool verify_bucket_permission(const DoutPrefixProvider* dpp,
req_state * const s,
const rgw_bucket& bucket,
- RGWAccessControlPolicy * const user_acl,
- RGWAccessControlPolicy * const bucket_acl,
+ const RGWAccessControlPolicy& user_acl,
+ const RGWAccessControlPolicy& bucket_acl,
const boost::optional<Policy>& bucket_policy,
const vector<Policy>& user_policies,
const vector<Policy>& session_policies,
@@ -1307,31 +1307,25 @@ bool verify_bucket_permission(const DoutPrefixProvider* dpp,
}
bool verify_bucket_permission_no_policy(const DoutPrefixProvider* dpp, struct perm_state_base * const s,
- RGWAccessControlPolicy * const user_acl,
- RGWAccessControlPolicy * const bucket_acl,
+ const RGWAccessControlPolicy& user_acl,
+ const RGWAccessControlPolicy& bucket_acl,
const int perm)
{
- if (!bucket_acl)
- return false;
-
if ((perm & (int)s->perm_mask) != perm)
return false;
- if (bucket_acl->verify_permission(dpp, *s->identity, perm, perm,
- s->get_referer(),
- s->bucket_access_conf &&
- s->bucket_access_conf->ignore_public_acls()))
+ if (bucket_acl.verify_permission(dpp, *s->identity, perm, perm,
+ s->get_referer(),
+ s->bucket_access_conf &&
+ s->bucket_access_conf->ignore_public_acls()))
return true;
- if (!user_acl)
- return false;
-
- return user_acl->verify_permission(dpp, *s->identity, perm, perm);
+ return user_acl.verify_permission(dpp, *s->identity, perm, perm);
}
bool verify_bucket_permission_no_policy(const DoutPrefixProvider* dpp, req_state * const s,
- RGWAccessControlPolicy * const user_acl,
- RGWAccessControlPolicy * const bucket_acl,
+ const RGWAccessControlPolicy& user_acl,
+ const RGWAccessControlPolicy& bucket_acl,
const int perm)
{
perm_state_from_req_state ps(s);
@@ -1351,8 +1345,8 @@ bool verify_bucket_permission_no_policy(const DoutPrefixProvider* dpp, req_state
return verify_bucket_permission_no_policy(dpp,
&ps,
- s->user_acl.get(),
- s->bucket_acl.get(),
+ s->user_acl,
+ s->bucket_acl,
perm);
}
@@ -1368,8 +1362,8 @@ bool verify_bucket_permission(const DoutPrefixProvider* dpp, req_state * const s
return verify_bucket_permission(dpp,
&ps,
s->bucket->get_key(),
- s->user_acl.get(),
- s->bucket_acl.get(),
+ s->user_acl,
+ s->bucket_acl,
s->iam_policy,
s->iam_user_policies,
s->session_policies,
@@ -1421,7 +1415,7 @@ int verify_bucket_owner_or_policy(req_state* const s,
identity_policy_res == Effect::Allow ||
(e == Effect::Pass &&
identity_policy_res == Effect::Pass &&
- s->auth.identity->is_owner_of(s->bucket_owner.get_id()))) {
+ s->auth.identity->is_owner_of(s->bucket_owner.id))) {
return 0;
} else {
return -EACCES;
@@ -1432,8 +1426,8 @@ int verify_bucket_owner_or_policy(req_state* const s,
static inline bool check_deferred_bucket_perms(const DoutPrefixProvider* dpp,
struct perm_state_base * const s,
const rgw_bucket& bucket,
- RGWAccessControlPolicy * const user_acl,
- RGWAccessControlPolicy * const bucket_acl,
+ const RGWAccessControlPolicy& user_acl,
+ const RGWAccessControlPolicy& bucket_acl,
const boost::optional<Policy>& bucket_policy,
const vector<Policy>& identity_policies,
const vector<Policy>& session_policies,
@@ -1446,8 +1440,8 @@ static inline bool check_deferred_bucket_perms(const DoutPrefixProvider* dpp,
static inline bool check_deferred_bucket_only_acl(const DoutPrefixProvider* dpp,
struct perm_state_base * const s,
- RGWAccessControlPolicy * const user_acl,
- RGWAccessControlPolicy * const bucket_acl,
+ const RGWAccessControlPolicy& user_acl,
+ const RGWAccessControlPolicy& bucket_acl,
const uint8_t deferred_check,
const int perm)
{
@@ -1457,9 +1451,9 @@ static inline bool check_deferred_bucket_only_acl(const DoutPrefixProvider* dpp,
bool verify_object_permission(const DoutPrefixProvider* dpp, struct perm_state_base * const s,
const rgw_obj& obj,
- RGWAccessControlPolicy * const user_acl,
- RGWAccessControlPolicy * const bucket_acl,
- RGWAccessControlPolicy * const object_acl,
+ const RGWAccessControlPolicy& user_acl,
+ const RGWAccessControlPolicy& bucket_acl,
+ const RGWAccessControlPolicy& object_acl,
const boost::optional<Policy>& bucket_policy,
const vector<Policy>& identity_policies,
const vector<Policy>& session_policies,
@@ -1512,14 +1506,10 @@ bool verify_object_permission(const DoutPrefixProvider* dpp, struct perm_state_b
return true;
}
- if (!object_acl) {
- return false;
- }
-
- bool ret = object_acl->verify_permission(dpp, *s->identity, s->perm_mask, perm,
- nullptr, /* http_referrer */
- s->bucket_access_conf &&
- s->bucket_access_conf->ignore_public_acls());
+ bool ret = object_acl.verify_permission(dpp, *s->identity, s->perm_mask, perm,
+ nullptr, /* http_referrer */
+ s->bucket_access_conf &&
+ s->bucket_access_conf->ignore_public_acls());
if (ret) {
return true;
}
@@ -1541,21 +1531,18 @@ bool verify_object_permission(const DoutPrefixProvider* dpp, struct perm_state_b
/* we already verified the user mask above, so we pass swift_perm as the mask here,
otherwise the mask might not cover the swift permissions bits */
- if (bucket_acl->verify_permission(dpp, *s->identity, swift_perm, swift_perm,
- s->get_referer()))
+ if (bucket_acl.verify_permission(dpp, *s->identity, swift_perm, swift_perm,
+ s->get_referer()))
return true;
- if (!user_acl)
- return false;
-
- return user_acl->verify_permission(dpp, *s->identity, swift_perm, swift_perm);
+ return user_acl.verify_permission(dpp, *s->identity, swift_perm, swift_perm);
}
bool verify_object_permission(const DoutPrefixProvider* dpp, req_state * const s,
const rgw_obj& obj,
- RGWAccessControlPolicy * const user_acl,
- RGWAccessControlPolicy * const bucket_acl,
- RGWAccessControlPolicy * const object_acl,
+ const RGWAccessControlPolicy& user_acl,
+ const RGWAccessControlPolicy& bucket_acl,
+ const RGWAccessControlPolicy& object_acl,
const boost::optional<Policy>& bucket_policy,
const vector<Policy>& identity_policies,
const vector<Policy>& session_policies,
@@ -1570,9 +1557,9 @@ bool verify_object_permission(const DoutPrefixProvider* dpp, req_state * const s
bool verify_object_permission_no_policy(const DoutPrefixProvider* dpp,
struct perm_state_base * const s,
- RGWAccessControlPolicy * const user_acl,
- RGWAccessControlPolicy * const bucket_acl,
- RGWAccessControlPolicy * const object_acl,
+ const RGWAccessControlPolicy& user_acl,
+ const RGWAccessControlPolicy& bucket_acl,
+ const RGWAccessControlPolicy& object_acl,
const int perm)
{
if (check_deferred_bucket_only_acl(dpp, s, user_acl, bucket_acl, RGW_DEFER_TO_BUCKET_ACLS_RECURSE, perm) ||
@@ -1580,14 +1567,10 @@ bool verify_object_permission_no_policy(const DoutPrefixProvider* dpp,
return true;
}
- if (!object_acl) {
- return false;
- }
-
- bool ret = object_acl->verify_permission(dpp, *s->identity, s->perm_mask, perm,
- nullptr, /* http referrer */
- s->bucket_access_conf &&
- s->bucket_access_conf->ignore_public_acls());
+ bool ret = object_acl.verify_permission(dpp, *s->identity, s->perm_mask, perm,
+ nullptr, /* http referrer */
+ s->bucket_access_conf &&
+ s->bucket_access_conf->ignore_public_acls());
if (ret) {
return true;
}
@@ -1609,14 +1592,11 @@ bool verify_object_permission_no_policy(const DoutPrefixProvider* dpp,
/* we already verified the user mask above, so we pass swift_perm as the mask here,
otherwise the mask might not cover the swift permissions bits */
- if (bucket_acl->verify_permission(dpp, *s->identity, swift_perm, swift_perm,
- s->get_referer()))
+ if (bucket_acl.verify_permission(dpp, *s->identity, swift_perm, swift_perm,
+ s->get_referer()))
return true;
- if (!user_acl)
- return false;
-
- return user_acl->verify_permission(dpp, *s->identity, swift_perm, swift_perm);
+ return user_acl.verify_permission(dpp, *s->identity, swift_perm, swift_perm);
}
bool verify_object_permission_no_policy(const DoutPrefixProvider* dpp, req_state *s, int perm)
@@ -1628,9 +1608,9 @@ bool verify_object_permission_no_policy(const DoutPrefixProvider* dpp, req_state
return verify_object_permission_no_policy(dpp,
&ps,
- s->user_acl.get(),
- s->bucket_acl.get(),
- s->object_acl.get(),
+ s->user_acl,
+ s->bucket_acl,
+ s->object_acl,
perm);
}
@@ -1641,9 +1621,9 @@ bool verify_object_permission(const DoutPrefixProvider* dpp, req_state *s, uint6
return verify_object_permission(dpp,
&ps,
rgw_obj(s->bucket->get_key(), s->object->get_key()),
- s->user_acl.get(),
- s->bucket_acl.get(),
- s->object_acl.get(),
+ s->user_acl,
+ s->bucket_acl,
+ s->object_acl,
s->iam_policy,
s->iam_user_policies,
s->session_policies,
diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h
index 85d5ab2c155..e6973766929 100644
--- a/src/rgw/rgw_common.h
+++ b/src/rgw/rgw_common.h
@@ -1180,9 +1180,9 @@ struct req_state : DoutPrefixProvider {
} s3_postobj_creds;
} auth;
- std::unique_ptr<RGWAccessControlPolicy> user_acl;
- std::unique_ptr<RGWAccessControlPolicy> bucket_acl;
- std::unique_ptr<RGWAccessControlPolicy> object_acl;
+ RGWAccessControlPolicy user_acl;
+ RGWAccessControlPolicy bucket_acl;
+ RGWAccessControlPolicy object_acl;
rgw::IAM::Environment env;
boost::optional<rgw::IAM::Policy> iam_policy;
@@ -1490,8 +1490,8 @@ bool rgw_set_amz_meta_header(
extern std::string rgw_string_unquote(const std::string& s);
extern void parse_csv_string(const std::string& ival, std::vector<std::string>& ovals);
-extern int parse_key_value(std::string& in_str, std::string& key, std::string& val);
-extern int parse_key_value(std::string& in_str, const char *delim, std::string& key, std::string& val);
+extern int parse_key_value(const std::string& in_str, std::string& key, std::string& val);
+extern int parse_key_value(const std::string& in_str, const char *delim, std::string& key, std::string& val);
extern boost::optional<std::pair<std::string_view,std::string_view>>
parse_key_value(const std::string_view& in_str,
@@ -1584,20 +1584,20 @@ struct perm_state : public perm_state_base {
bool verify_bucket_permission_no_policy(
const DoutPrefixProvider* dpp,
struct perm_state_base * const s,
- RGWAccessControlPolicy * const user_acl,
- RGWAccessControlPolicy * const bucket_acl,
+ const RGWAccessControlPolicy& user_acl,
+ const RGWAccessControlPolicy& bucket_acl,
const int perm);
bool verify_user_permission_no_policy(const DoutPrefixProvider* dpp,
struct perm_state_base * const s,
- RGWAccessControlPolicy * const user_acl,
+ const RGWAccessControlPolicy& user_acl,
const int perm);
bool verify_object_permission_no_policy(const DoutPrefixProvider* dpp,
struct perm_state_base * const s,
- RGWAccessControlPolicy * const user_acl,
- RGWAccessControlPolicy * const bucket_acl,
- RGWAccessControlPolicy * const object_acl,
+ const RGWAccessControlPolicy& user_acl,
+ const RGWAccessControlPolicy& bucket_acl,
+ const RGWAccessControlPolicy& object_acl,
const int perm);
/** Check if the req_state's user has the necessary permissions
@@ -1609,7 +1609,7 @@ rgw::IAM::Effect eval_identity_or_session_policies(const DoutPrefixProvider* dpp
const rgw::ARN& arn);
bool verify_user_permission(const DoutPrefixProvider* dpp,
req_state * const s,
- RGWAccessControlPolicy * const user_acl,
+ const RGWAccessControlPolicy& user_acl,
const std::vector<rgw::IAM::Policy>& user_policies,
const std::vector<rgw::IAM::Policy>& session_policies,
const rgw::ARN& res,
@@ -1617,7 +1617,7 @@ bool verify_user_permission(const DoutPrefixProvider* dpp,
bool mandatory_policy=true);
bool verify_user_permission_no_policy(const DoutPrefixProvider* dpp,
req_state * const s,
- RGWAccessControlPolicy * const user_acl,
+ const RGWAccessControlPolicy& user_acl,
const int perm);
bool verify_user_permission(const DoutPrefixProvider* dpp,
req_state * const s,
@@ -1631,8 +1631,8 @@ bool verify_bucket_permission(
const DoutPrefixProvider* dpp,
req_state * const s,
const rgw_bucket& bucket,
- RGWAccessControlPolicy * const user_acl,
- RGWAccessControlPolicy * const bucket_acl,
+ const RGWAccessControlPolicy& user_acl,
+ const RGWAccessControlPolicy& bucket_acl,
const boost::optional<rgw::IAM::Policy>& bucket_policy,
const std::vector<rgw::IAM::Policy>& identity_policies,
const std::vector<rgw::IAM::Policy>& session_policies,
@@ -1641,8 +1641,8 @@ bool verify_bucket_permission(const DoutPrefixProvider* dpp, req_state * const s
bool verify_bucket_permission_no_policy(
const DoutPrefixProvider* dpp,
req_state * const s,
- RGWAccessControlPolicy * const user_acl,
- RGWAccessControlPolicy * const bucket_acl,
+ const RGWAccessControlPolicy& user_acl,
+ const RGWAccessControlPolicy& bucket_acl,
const int perm);
bool verify_bucket_permission_no_policy(const DoutPrefixProvider* dpp,
req_state * const s,
@@ -1653,9 +1653,9 @@ extern bool verify_object_permission(
const DoutPrefixProvider* dpp,
req_state * const s,
const rgw_obj& obj,
- RGWAccessControlPolicy * const user_acl,
- RGWAccessControlPolicy * const bucket_acl,
- RGWAccessControlPolicy * const object_acl,
+ const RGWAccessControlPolicy& user_acl,
+ const RGWAccessControlPolicy& bucket_acl,
+ const RGWAccessControlPolicy& object_acl,
const boost::optional<rgw::IAM::Policy>& bucket_policy,
const std::vector<rgw::IAM::Policy>& identity_policies,
const std::vector<rgw::IAM::Policy>& session_policies,
@@ -1664,9 +1664,9 @@ extern bool verify_object_permission(const DoutPrefixProvider* dpp, req_state *s
extern bool verify_object_permission_no_policy(
const DoutPrefixProvider* dpp,
req_state * const s,
- RGWAccessControlPolicy * const user_acl,
- RGWAccessControlPolicy * const bucket_acl,
- RGWAccessControlPolicy * const object_acl,
+ const RGWAccessControlPolicy& user_acl,
+ const RGWAccessControlPolicy& bucket_acl,
+ const RGWAccessControlPolicy& object_acl,
int perm);
extern bool verify_object_permission_no_policy(const DoutPrefixProvider* dpp, req_state *s,
int perm);
diff --git a/src/rgw/rgw_file.cc b/src/rgw/rgw_file.cc
index e1217173fed..f0100e37c2b 100644
--- a/src/rgw/rgw_file.cc
+++ b/src/rgw/rgw_file.cc
@@ -1872,7 +1872,7 @@ namespace rgw {
}
}
processor = get_driver()->get_atomic_writer(this, state->yield, state->object.get(),
- state->bucket_owner.get_id(),
+ state->bucket_owner.id,
&state->dest_placement, 0, state->req_id);
op_ret = processor->prepare(state->yield);
diff --git a/src/rgw/rgw_file_int.h b/src/rgw/rgw_file_int.h
index 91c858e5b3b..5658003adb8 100644
--- a/src/rgw/rgw_file_int.h
+++ b/src/rgw/rgw_file_int.h
@@ -1914,11 +1914,9 @@ public:
int get_params(optional_yield) override {
req_state* state = get_state();
- RGWAccessControlPolicy_S3 s3policy(state->cct);
- /* we don't have (any) headers, so just create canned ACLs */
- int ret = s3policy.create_canned(state->owner, state->bucket_owner, state->canned_acl);
- policy = s3policy;
- return ret;
+ /* we don't have (any) headers, so just create default ACLs */
+ policy.create_default(state->owner.id, state->owner.display_name);
+ return 0;
}
void send_response() override {
@@ -2030,11 +2028,9 @@ public:
int get_params(optional_yield) override {
req_state* state = get_state();
- RGWAccessControlPolicy_S3 s3policy(state->cct);
- /* we don't have (any) headers, so just create canned ACLs */
- int ret = s3policy.create_canned(state->owner, state->bucket_owner, state->canned_acl);
- policy = s3policy;
- return ret;
+ /* we don't have (any) headers, so just create default ACLs */
+ policy.create_default(state->owner.id, state->owner.display_name);
+ return 0;
}
int get_data(buffer::list& _bl) override {
@@ -2534,11 +2530,9 @@ public:
int get_params(optional_yield) override {
req_state* state = get_state();
- RGWAccessControlPolicy_S3 s3policy(state->cct);
- /* we don't have (any) headers, so just create canned ACLs */
- int ret = s3policy.create_canned(state->owner, state->bucket_owner, state->canned_acl);
- policy = s3policy;
- return ret;
+ /* we don't have (any) headers, so just create default ACLs */
+ policy.create_default(state->owner.id, state->owner.display_name);
+ return 0;
}
int get_data(buffer::list& _bl) override {
@@ -2641,15 +2635,13 @@ public:
int get_params(optional_yield) override {
req_state* s = get_state();
- RGWAccessControlPolicy_S3 s3policy(s->cct);
- /* we don't have (any) headers, so just create canned ACLs */
- int ret = s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl);
- dest_policy = s3policy;
+ /* we don't have (any) headers, so just create default ACLs */
+ dest_policy.create_default(s->owner.id, s->owner.display_name);
/* src_object required before RGWCopyObj::verify_permissions() */
rgw_obj_key k = rgw_obj_key(src_name);
s->src_object = s->bucket->get_object(k);
s->object = s->src_object->clone(); // needed to avoid trap at rgw_op.cc:5150
- return ret;
+ return 0;
}
void send_response() override {}
diff --git a/src/rgw/rgw_kafka.cc b/src/rgw/rgw_kafka.cc
index eb6b85071a7..4593a4de67b 100644
--- a/src/rgw/rgw_kafka.cc
+++ b/src/rgw/rgw_kafka.cc
@@ -100,8 +100,9 @@ struct connection_t {
// fire all remaining callbacks (if not fired by rd_kafka_flush)
std::for_each(callbacks.begin(), callbacks.end(), [this](auto& cb_tag) {
cb_tag.cb(status);
- ldout(cct, 20) << "Kafka destroy: invoking callback with tag=" << cb_tag.tag <<
- " for: " << broker << dendl;
+ ldout(cct, 20) << "Kafka destroy: invoking callback with tag="
+ << cb_tag.tag << " for: " << broker
+ << " with status: " << status << dendl;
});
callbacks.clear();
delivery_tag = 1;
@@ -418,7 +419,9 @@ private:
if (tag) {
auto const q_len = conn->callbacks.size();
if (q_len < max_inflight) {
- ldout(conn->cct, 20) << "Kafka publish (with callback, tag=" << *tag << "): OK. Queue has: " << q_len << " callbacks" << dendl;
+ ldout(conn->cct, 20)
+ << "Kafka publish (with callback, tag=" << *tag
+ << "): OK. Queue has: " << q_len + 1 << " callbacks" << dendl;
conn->callbacks.emplace_back(*tag, message->cb);
} else {
// immediately invoke callback with error - this is not a connection error
@@ -463,6 +466,7 @@ private:
if(conn->timestamp.sec() + max_idle_time < ceph_clock_now()) {
ldout(conn->cct, 20) << "kafka run: deleting a connection due to idle behaviour: " << ceph_clock_now() << dendl;
std::lock_guard lock(connections_lock);
+ conn->status = STATUS_CONNECTION_IDLE;
conn_it = connections.erase(conn_it);
--connection_count; \
continue;
diff --git a/src/rgw/rgw_lc.cc b/src/rgw/rgw_lc.cc
index 79b81270457..b857cc7a82f 100644
--- a/src/rgw/rgw_lc.cc
+++ b/src/rgw/rgw_lc.cc
@@ -41,6 +41,9 @@
#define dout_context g_ceph_context
#define dout_subsys ceph_subsys_rgw
+constexpr int32_t hours_in_a_day = 24;
+constexpr int32_t secs_in_a_day = hours_in_a_day * 60 * 60;
+
using namespace std;
const char* LC_STATUS[] = {
@@ -289,7 +292,7 @@ static bool obj_has_expired(const DoutPrefixProvider *dpp, CephContext *cct, cep
utime_t base_time;
if (cct->_conf->rgw_lc_debug_interval <= 0) {
/* Normal case, run properly */
- cmp = double(days)*24*60*60;
+ cmp = double(days) * secs_in_a_day;
base_time = ceph_clock_now().round_to_day();
} else {
/* We're in debug mode; Treat each rgw_lc_debug_interval seconds as a day */
@@ -546,9 +549,9 @@ static int remove_expired_obj(
= obj->get_delete_op();
del_op->params.versioning_status
= obj->get_bucket()->get_info().versioning_status();
- del_op->params.obj_owner.set_id(rgw_user {meta.owner});
- del_op->params.obj_owner.set_name(meta.owner_display_name);
- del_op->params.bucket_owner.set_id(bucket_info.owner);
+ del_op->params.obj_owner.id = rgw_user{meta.owner};
+ del_op->params.obj_owner.display_name = meta.owner_display_name;
+ del_op->params.bucket_owner.id = bucket_info.owner;
del_op->params.unmod_since = meta.mtime;
// notification supported only for RADOS driver for now
@@ -814,7 +817,6 @@ int RGWLC::handle_multipart_expiration(rgw::sal::Bucket* target,
const multimap<string, lc_op>& prefix_map,
LCWorker* worker, time_t stop_at, bool once)
{
- MultipartMetaFilter mp_filter;
int ret;
rgw::sal::Bucket::ListParams params;
rgw::sal::Bucket::ListResults results;
@@ -825,7 +827,7 @@ int RGWLC::handle_multipart_expiration(rgw::sal::Bucket* target,
* operating on one shard at a time */
params.allow_unordered = true;
params.ns = RGW_OBJ_NS_MULTIPART;
- params.access_list_filter = &mp_filter;
+ params.access_list_filter = MultipartMetaFilter;
const auto event_type = rgw::notify::ObjectExpirationAbortMPU;
@@ -1108,7 +1110,7 @@ public:
return is_expired;
}
- int process(lc_op_ctx& oc) {
+ int process(lc_op_ctx& oc) override {
auto& o = oc.o;
int r;
if (o.is_delete_marker()) {
@@ -1172,7 +1174,7 @@ public:
pass_object_lock_check(oc.driver, oc.obj.get(), dpp);
}
- int process(lc_op_ctx& oc) {
+ int process(lc_op_ctx& oc) override {
auto& o = oc.o;
int r = remove_expired_obj(oc.dpp, oc, true,
rgw::notify::ObjectExpirationNoncurrent);
@@ -1217,7 +1219,7 @@ public:
return true;
}
- int process(lc_op_ctx& oc) {
+ int process(lc_op_ctx& oc) override {
auto& o = oc.o;
int r = remove_expired_obj(oc.dpp, oc, true,
rgw::notify::ObjectExpirationDeleteMarker);
@@ -1385,7 +1387,7 @@ public:
return 0;
}
- int process(lc_op_ctx& oc) {
+ int process(lc_op_ctx& oc) override {
auto& o = oc.o;
int r;
@@ -1460,7 +1462,7 @@ protected:
public:
LCOpAction_CurrentTransition(const transition_action& _transition)
: LCOpAction_Transition(_transition) {}
- int process(lc_op_ctx& oc) {
+ int process(lc_op_ctx& oc) override {
int r = LCOpAction_Transition::process(oc);
if (r == 0) {
if (perfcounter) {
@@ -1485,7 +1487,7 @@ public:
const transition_action& _transition)
: LCOpAction_Transition(_transition)
{}
- int process(lc_op_ctx& oc) {
+ int process(lc_op_ctx& oc) override {
int r = LCOpAction_Transition::process(oc);
if (r == 0) {
if (perfcounter) {
@@ -1944,8 +1946,7 @@ bool RGWLC::expired_session(time_t started)
}
time_t interval = (cct->_conf->rgw_lc_debug_interval > 0)
- ? cct->_conf->rgw_lc_debug_interval
- : 24*60*60;
+ ? cct->_conf->rgw_lc_debug_interval : secs_in_a_day;
auto now = time(nullptr);
@@ -1961,8 +1962,7 @@ bool RGWLC::expired_session(time_t started)
time_t RGWLC::thread_stop_at()
{
uint64_t interval = (cct->_conf->rgw_lc_debug_interval > 0)
- ? cct->_conf->rgw_lc_debug_interval
- : 24*60*60;
+ ? cct->_conf->rgw_lc_debug_interval : secs_in_a_day;
return time(nullptr) + interval;
}
@@ -2053,7 +2053,7 @@ static inline bool allow_shard_rollover(CephContext* cct, time_t now, time_t sha
* - the current shard has not rolled over in the last 24 hours
*/
if (((shard_rollover_date < now) &&
- (now - shard_rollover_date > 24*60*60)) ||
+ (now - shard_rollover_date > secs_in_a_day)) ||
(! shard_rollover_date /* no rollover date stored */) ||
(cct->_conf->rgw_lc_debug_interval > 0 /* defaults to -1 == disabled */)) {
return true;
@@ -2079,7 +2079,7 @@ static inline bool already_run_today(CephContext* cct, time_t start_date)
bdt.tm_min = 0;
bdt.tm_sec = 0;
begin_of_day = mktime(&bdt);
- if (now - begin_of_day < 24*60*60)
+ if (now - begin_of_day < secs_in_a_day)
return true;
else
return false;
@@ -2425,6 +2425,12 @@ bool RGWLC::LCWorker::should_work(utime_t& now)
time_t tt = now.sec();
localtime_r(&tt, &bdt);
+ // next-day adjustment if the configured end_hour is less than start_hour
+ if (end_hour < start_hour) {
+ bdt.tm_hour = bdt.tm_hour > end_hour ? bdt.tm_hour : bdt.tm_hour + hours_in_a_day;
+ end_hour += hours_in_a_day;
+ }
+
if (cct->_conf->rgw_lc_debug_interval > 0) {
/* We're debugging, so say we can run */
return true;
@@ -2465,7 +2471,7 @@ int RGWLC::LCWorker::schedule_next_start_time(utime_t &start, utime_t& now)
nt = mktime(&bdt);
secs = nt - tt;
- return secs>0 ? secs : secs+24*60*60;
+ return secs > 0 ? secs : secs + secs_in_a_day;
}
RGWLC::LCWorker::~LCWorker()
@@ -2756,7 +2762,7 @@ std::string s3_expiration_header(
if (rule_expiration.has_days()) {
rule_expiration_date =
boost::optional<ceph::real_time>(
- mtime + make_timespan(double(rule_expiration.get_days())*24*60*60 - ceph::real_clock::to_time_t(mtime)%(24*60*60) + 24*60*60));
+ mtime + make_timespan(double(rule_expiration.get_days()) * secs_in_a_day - ceph::real_clock::to_time_t(mtime)%(secs_in_a_day) + secs_in_a_day));
}
}
@@ -2835,7 +2841,7 @@ bool s3_multipart_abort_header(
std::optional<ceph::real_time> rule_abort_date;
if (mp_expiration.has_days()) {
rule_abort_date = std::optional<ceph::real_time>(
- mtime + make_timespan(mp_expiration.get_days()*24*60*60 - ceph::real_clock::to_time_t(mtime)%(24*60*60) + 24*60*60));
+ mtime + make_timespan(mp_expiration.get_days() * secs_in_a_day - ceph::real_clock::to_time_t(mtime)%(secs_in_a_day) + secs_in_a_day));
}
// update earliest abort date
diff --git a/src/rgw/rgw_lib.cc b/src/rgw/rgw_lib.cc
index 5b0a3c72849..3168abadb0b 100644
--- a/src/rgw/rgw_lib.cc
+++ b/src/rgw/rgw_lib.cc
@@ -468,6 +468,7 @@ namespace rgw {
int RGWLib::init(vector<const char*>& args)
{
+ int r{0};
/* alternative default for module */
map<std::string,std::string> defaults = {
{ "debug_rgw", "1/5" },
@@ -524,7 +525,13 @@ namespace rgw {
register_async_signal_handler(SIGUSR1, rgw::signal::handle_sigterm);
main.init_tracepoints();
- main.init_frontends2(this /* rgwlib */);
+ r = main.init_frontends2(this /* rgwlib */);
+ if (r != 0) {
+ derr << "ERROR: unable to initialize frontend, r = " << r << dendl;
+ main.shutdown();
+ return r;
+ }
+
main.init_notification_endpoints();
main.init_lua();
@@ -601,8 +608,8 @@ namespace rgw {
s->perm_mask = RGW_PERM_FULL_CONTROL;
// populate the owner info
- s->owner.set_id(s->user->get_id());
- s->owner.set_name(s->user->get_display_name());
+ s->owner.id = s->user->get_id();
+ s->owner.display_name = s->user->get_display_name();
return 0;
} /* RGWHandler_Lib::authorize */
diff --git a/src/rgw/rgw_log.cc b/src/rgw/rgw_log.cc
index 9bc27bbe9ff..cc7e085ab72 100644
--- a/src/rgw/rgw_log.cc
+++ b/src/rgw/rgw_log.cc
@@ -207,14 +207,14 @@ static void log_usage(req_state *s, const string& op_name)
bucket_name = s->bucket_name;
if (!bucket_name.empty()) {
- bucket_name = s->bucket_name;
- user = s->bucket_owner.get_id();
+ bucket_name = s->bucket_name;
+ user = s->bucket_owner.id;
if (!rgw::sal::Bucket::empty(s->bucket.get()) &&
s->bucket->get_info().requester_pays) {
payer = s->user->get_id();
}
} else {
- user = s->user->get_id();
+ user = s->user->get_id();
}
bool error = s->err.is_err();
@@ -647,9 +647,8 @@ int rgw_log_op(RGWREST* const rest, req_state *s, const RGWOp* op, OpsLogSink *o
}
entry.user = s->user->get_id().to_str();
- if (s->object_acl)
- entry.object_owner = s->object_acl->get_owner().get_id();
- entry.bucket_owner = s->bucket_owner.get_id();
+ entry.object_owner = s->object_acl.get_owner().id;
+ entry.bucket_owner = s->bucket_owner.id;
uint64_t bytes_sent = ACCOUNTING_IO(s)->get_bytes_sent();
uint64_t bytes_received = ACCOUNTING_IO(s)->get_bytes_received();
diff --git a/src/rgw/rgw_lua_request.cc b/src/rgw/rgw_lua_request.cc
index cb819deff14..a36aad666f4 100644
--- a/src/rgw/rgw_lua_request.cc
+++ b/src/rgw/rgw_lua_request.cc
@@ -260,10 +260,9 @@ struct OwnerMetaTable : public EmptyMetaTable {
const char* index = luaL_checkstring(L, 2);
if (strcasecmp(index, "DisplayName") == 0) {
- pushstring(L, owner->get_display_name());
+ pushstring(L, owner->display_name);
} else if (strcasecmp(index, "User") == 0) {
- create_metatable<UserMetaTable>(L, name, index, false,
- &(owner->get_id()));
+ create_metatable<UserMetaTable>(L, name, index, false, &owner->id);
} else {
return error_unknown_field(L, index, name);
}
@@ -365,19 +364,26 @@ struct GrantMetaTable : public EmptyMetaTable {
if (strcasecmp(index, "Type") == 0) {
lua_pushinteger(L, grant->get_type().get_type());
} else if (strcasecmp(index, "User") == 0) {
- const auto id_ptr = grant->get_id();
- if (id_ptr) {
+ if (const auto user = grant->get_user(); user) {
create_metatable<UserMetaTable>(L, name, index, false,
- const_cast<rgw_user*>(id_ptr));
+ const_cast<rgw_user*>(&user->id));
} else {
lua_pushnil(L);
}
} else if (strcasecmp(index, "Permission") == 0) {
lua_pushinteger(L, grant->get_permission().get_permissions());
} else if (strcasecmp(index, "GroupType") == 0) {
- lua_pushinteger(L, grant->get_group());
+ if (const auto group = grant->get_group(); group) {
+ lua_pushinteger(L, group->type);
+ } else {
+ lua_pushnil(L);
+ }
} else if (strcasecmp(index, "Referer") == 0) {
- pushstring(L, grant->get_referer());
+ if (const auto referer = grant->get_referer(); referer) {
+ pushstring(L, referer->url_spec);
+ } else {
+ lua_pushnil(L);
+ }
} else {
return error_unknown_field(L, index, name);
}
@@ -712,11 +718,11 @@ struct RequestMetaTable : public EmptyMetaTable {
} else if (strcasecmp(index, "ZoneGroup") == 0) {
create_metatable<ZoneGroupMetaTable>(L, name, index, false, s);
} else if (strcasecmp(index, "UserACL") == 0) {
- create_metatable<ACLMetaTable>(L, name, index, false, s->user_acl);
+ create_metatable<ACLMetaTable>(L, name, index, false, &s->user_acl);
} else if (strcasecmp(index, "BucketACL") == 0) {
- create_metatable<ACLMetaTable>(L, name, index, false, s->bucket_acl);
+ create_metatable<ACLMetaTable>(L, name, index, false, &s->bucket_acl);
} else if (strcasecmp(index, "ObjectACL") == 0) {
- create_metatable<ACLMetaTable>(L, name, index, false, s->object_acl);
+ create_metatable<ACLMetaTable>(L, name, index, false, &s->object_acl);
} else if (strcasecmp(index, "Environment") == 0) {
create_metatable<StringMapMetaTable<rgw::IAM::Environment>>(L, name, index, false, &(s->env));
} else if (strcasecmp(index, "Policy") == 0) {
diff --git a/src/rgw/rgw_main.cc b/src/rgw/rgw_main.cc
index 27b02f84195..57407b43481 100644
--- a/src/rgw/rgw_main.cc
+++ b/src/rgw/rgw_main.cc
@@ -157,7 +157,12 @@ int main(int argc, char *argv[])
main.init_opslog();
main.init_tracepoints();
main.init_lua();
- main.init_frontends2(nullptr /* RGWLib */);
+ r = main.init_frontends2(nullptr /* RGWLib */);
+ if (r != 0) {
+ derr << "ERROR: initialize frontend fail, r = " << r << dendl;
+ main.shutdown();
+ return r;
+ }
main.init_notification_endpoints();
#if defined(HAVE_SYS_PRCTL_H)
diff --git a/src/rgw/rgw_mdlog.h b/src/rgw/rgw_mdlog.h
index 152126890b7..6c78fd38408 100644
--- a/src/rgw/rgw_mdlog.h
+++ b/src/rgw/rgw_mdlog.h
@@ -16,12 +16,13 @@
#pragma once
+#include "include/rados/librados.hpp"
+
#include "common/RWLock.h"
#include "rgw_metadata.h"
#include "rgw_mdlog_types.h"
-
-#include "services/svc_rados.h"
+#include "rgw_tools.h"
#define META_LOG_OBJ_PREFIX "meta.log."
@@ -40,7 +41,7 @@ class RGWMetadataLogInfoCompletion : public RefCountedObject {
using info_callback_t = std::function<void(int, const cls_log_header&)>;
private:
cls_log_header header;
- RGWSI_RADOS::Obj io_obj;
+ rgw_rados_ref io_obj;
librados::AioCompletion *completion;
std::mutex mutex; //< protects callback between cancel/complete
boost::optional<info_callback_t> callback; //< cleared on cancel
@@ -48,7 +49,7 @@ class RGWMetadataLogInfoCompletion : public RefCountedObject {
explicit RGWMetadataLogInfoCompletion(info_callback_t callback);
~RGWMetadataLogInfoCompletion() override;
- RGWSI_RADOS::Obj& get_io_obj() { return io_obj; }
+ rgw_rados_ref& get_io_obj() { return io_obj; }
cls_log_header& get_header() { return header; }
librados::AioCompletion* get_completion() { return completion; }
diff --git a/src/rgw/rgw_multipart_meta_filter.cc b/src/rgw/rgw_multipart_meta_filter.cc
index c616cd480f7..aeefc731f73 100644
--- a/src/rgw/rgw_multipart_meta_filter.cc
+++ b/src/rgw/rgw_multipart_meta_filter.cc
@@ -3,11 +3,9 @@
#include "svc_tier_rados.h"
-using namespace std;
-
const std::string MP_META_SUFFIX = ".meta";
-bool MultipartMetaFilter::filter(const string& name, string& key) {
+bool MultipartMetaFilter(const std::string& name, std::string& key) {
// the length of the suffix so we can skip past it
static const size_t MP_META_SUFFIX_LEN = MP_META_SUFFIX.length();
@@ -19,11 +17,11 @@ bool MultipartMetaFilter::filter(const string& name, string& key) {
return false;
size_t pos = name.find(MP_META_SUFFIX, len - MP_META_SUFFIX_LEN);
- if (pos == string::npos)
+ if (pos == std::string::npos)
return false;
pos = name.rfind('.', pos - 1);
- if (pos == string::npos)
+ if (pos == std::string::npos)
return false;
key = name.substr(0, pos);
diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc
index 7363db728a4..750ad7cb773 100644
--- a/src/rgw/rgw_op.cc
+++ b/src/rgw/rgw_op.cc
@@ -102,8 +102,6 @@ static string shadow_ns = RGW_OBJ_NS_SHADOW;
static void forward_req_info(const DoutPrefixProvider *dpp, CephContext *cct, req_info& info, const std::string& bucket_name);
-static MultipartMetaFilter mp_filter;
-
// this probably should belong in the rgw_iam_policy_keywords, I'll get it to it
// at some point
static constexpr auto S3_EXISTING_OBJTAG = "s3:ExistingObjectTag";
@@ -221,19 +219,18 @@ done:
static int decode_policy(const DoutPrefixProvider *dpp,
CephContext *cct,
bufferlist& bl,
- RGWAccessControlPolicy *policy)
+ RGWAccessControlPolicy& policy)
{
auto iter = bl.cbegin();
try {
- policy->decode(iter);
+ policy.decode(iter);
} catch (buffer::error& err) {
ldpp_dout(dpp, 0) << "ERROR: could not decode policy, caught buffer::error" << dendl;
return -EIO;
}
if (cct->_conf->subsys.should_gather<ceph_subsys_rgw, 15>()) {
ldpp_dout(dpp, 15) << __func__ << " Read AccessControlPolicy";
- RGWAccessControlPolicy_S3 *s3policy = static_cast<RGWAccessControlPolicy_S3 *>(policy);
- s3policy->to_xml(*_dout);
+ rgw::s3::write_policy_xml(policy, *_dout);
*_dout << dendl;
}
return 0;
@@ -245,17 +242,11 @@ static int get_user_policy_from_attr(const DoutPrefixProvider *dpp,
map<string, bufferlist>& attrs,
RGWAccessControlPolicy& policy /* out */)
{
- auto aiter = attrs.find(RGW_ATTR_ACL);
- if (aiter != attrs.end()) {
- int ret = decode_policy(dpp, cct, aiter->second, &policy);
- if (ret < 0) {
- return ret;
- }
- } else {
+ auto i = attrs.find(RGW_ATTR_ACL);
+ if (i == attrs.end()) {
return -ENOENT;
}
-
- return 0;
+ return decode_policy(dpp, cct, i->second, policy);
}
/**
@@ -270,7 +261,7 @@ int rgw_op_get_bucket_policy_from_attr(const DoutPrefixProvider *dpp,
rgw::sal::Driver* driver,
const rgw_user& bucket_owner,
map<string, bufferlist>& bucket_attrs,
- RGWAccessControlPolicy *policy,
+ RGWAccessControlPolicy& policy,
optional_yield y)
{
map<string, bufferlist>::iterator aiter = bucket_attrs.find(RGW_ATTR_ACL);
@@ -287,7 +278,7 @@ int rgw_op_get_bucket_policy_from_attr(const DoutPrefixProvider *dpp,
if (r < 0)
return r;
- policy->create_default(user->get_id(), user->get_display_name());
+ policy.create_default(user->get_id(), user->get_display_name());
}
return 0;
}
@@ -297,7 +288,7 @@ static int get_obj_policy_from_attr(const DoutPrefixProvider *dpp,
rgw::sal::Driver* driver,
RGWBucketInfo& bucket_info,
map<string, bufferlist>& bucket_attrs,
- RGWAccessControlPolicy *policy,
+ RGWAccessControlPolicy& policy,
string *storage_class,
rgw::sal::Object* obj,
optional_yield y)
@@ -320,7 +311,7 @@ static int get_obj_policy_from_attr(const DoutPrefixProvider *dpp,
if (ret < 0)
return ret;
- policy->create_default(bucket_info.owner, user->get_display_name());
+ policy.create_default(bucket_info.owner, user->get_display_name());
}
if (storage_class) {
@@ -387,7 +378,7 @@ static int read_bucket_policy(const DoutPrefixProvider *dpp,
req_state *s,
RGWBucketInfo& bucket_info,
map<string, bufferlist>& bucket_attrs,
- RGWAccessControlPolicy *policy,
+ RGWAccessControlPolicy& policy,
rgw_bucket& bucket,
optional_yield y)
{
@@ -404,7 +395,7 @@ static int read_bucket_policy(const DoutPrefixProvider *dpp,
int ret = rgw_op_get_bucket_policy_from_attr(dpp, s->cct, driver, bucket_info.owner,
bucket_attrs, policy, y);
if (ret == -ENOENT) {
- ret = -ERR_NO_SUCH_BUCKET;
+ ret = -ERR_NO_SUCH_BUCKET;
}
return ret;
@@ -415,7 +406,7 @@ static int read_obj_policy(const DoutPrefixProvider *dpp,
req_state *s,
RGWBucketInfo& bucket_info,
map<string, bufferlist>& bucket_attrs,
- RGWAccessControlPolicy* acl,
+ RGWAccessControlPolicy& acl,
string *storage_class,
boost::optional<Policy>& policy,
rgw::sal::Bucket* bucket,
@@ -452,14 +443,14 @@ static int read_obj_policy(const DoutPrefixProvider *dpp,
if (ret == -ENOENT) {
/* object does not exist checking the bucket's ACL to make sure
that we send a proper error code */
- RGWAccessControlPolicy bucket_policy(s->cct);
+ RGWAccessControlPolicy bucket_policy;
ret = rgw_op_get_bucket_policy_from_attr(dpp, s->cct, driver, bucket_info.owner,
- bucket_attrs, &bucket_policy, y);
+ bucket_attrs, bucket_policy, y);
if (ret < 0) {
return ret;
}
- const rgw_user& bucket_owner = bucket_policy.get_owner().get_id();
- if (bucket_owner.compare(s->user->get_id()) != 0 &&
+ const rgw_user& bucket_owner = bucket_policy.get_owner().id;
+ if (bucket_owner != s->user->get_id() &&
! s->auth.identity->is_admin_of(bucket_owner)) {
auto r = eval_identity_or_session_policies(dpp, s->iam_user_policies, s->env,
rgw::IAM::s3ListBucket, ARN(bucket->get_key()));
@@ -514,20 +505,6 @@ int rgw_build_bucket_policies(const DoutPrefixProvider *dpp, rgw::sal::Driver* d
}
}
- if(s->dialect.compare("s3") == 0) {
- s->bucket_acl = std::make_unique<RGWAccessControlPolicy_S3>(s->cct);
- } else if(s->dialect.compare("swift") == 0) {
- /* We aren't allocating the account policy for those operations using
- * the Swift's infrastructure that don't really need req_state::user.
- * Typical example here is the implementation of /info. */
- if (!s->user->get_id().empty()) {
- s->user_acl = std::make_unique<RGWAccessControlPolicy_SWIFTAcct>(s->cct);
- }
- s->bucket_acl = std::make_unique<RGWAccessControlPolicy_SWIFT>(s->cct);
- } else {
- s->bucket_acl = std::make_unique<RGWAccessControlPolicy>(s->cct);
- }
-
const RGWZoneGroup& zonegroup = s->penv.site->get_zonegroup();
/* check if copy source is within the current domain */
@@ -576,13 +553,13 @@ int rgw_build_bucket_policies(const DoutPrefixProvider *dpp, rgw::sal::Driver* d
s->bucket_attrs = s->bucket->get_attrs();
ret = read_bucket_policy(dpp, driver, s, s->bucket->get_info(),
s->bucket->get_attrs(),
- s->bucket_acl.get(), s->bucket->get_key(), y);
+ s->bucket_acl, s->bucket->get_key(), y);
acct_acl_user = {
s->bucket->get_info().owner,
- s->bucket_acl->get_owner().get_display_name(),
+ s->bucket_acl.get_owner().display_name,
};
- s->bucket_owner = s->bucket_acl->get_owner();
+ s->bucket_owner = s->bucket_acl.get_owner();
s->zonegroup_endpoint = rgw::get_zonegroup_endpoint(zonegroup);
s->zonegroup_name = zonegroup.get_name();
@@ -618,12 +595,12 @@ int rgw_build_bucket_policies(const DoutPrefixProvider *dpp, rgw::sal::Driver* d
}
/* handle user ACL only for those APIs which support it */
- if (s->user_acl) {
+ if (s->dialect == "swift" && !s->user->get_id().empty()) {
std::unique_ptr<rgw::sal::User> acl_user = driver->get_user(acct_acl_user.uid);
ret = acl_user->read_attrs(dpp, y);
if (!ret) {
- ret = get_user_policy_from_attr(dpp, s->cct, acl_user->get_attrs(), *s->user_acl);
+ ret = get_user_policy_from_attr(dpp, s->cct, acl_user->get_attrs(), s->user_acl);
}
if (-ENOENT == ret) {
/* In already existing clusters users won't have ACL. In such case
@@ -633,8 +610,8 @@ int rgw_build_bucket_policies(const DoutPrefixProvider *dpp, rgw::sal::Driver* d
* 1. if we try to reach an existing bucket, its owner is considered
* as account owner.
* 2. otherwise account owner is identity stored in s->user->user_id. */
- s->user_acl->create_default(acct_acl_user.uid,
- acct_acl_user.display_name);
+ s->user_acl.create_default(acct_acl_user.uid,
+ acct_acl_user.display_name);
ret = 0;
} else if (ret < 0) {
ldpp_dout(dpp, 0) << "NOTICE: couldn't get user attrs for handling ACL "
@@ -661,18 +638,29 @@ int rgw_build_bucket_policies(const DoutPrefixProvider *dpp, rgw::sal::Driver* d
}
} catch (const std::exception& e) {
ldpp_dout(dpp, -1) << "Error reading IAM User Policy: " << e.what() << dendl;
- ret = -EACCES;
+ if (!s->system_request) {
+ ret = -EACCES;
+ }
}
}
try {
s->iam_policy = get_iam_policy_from_attr(s->cct, s->bucket_attrs, s->bucket_tenant);
} catch (const std::exception& e) {
- // Really this is a can't happen condition. We parse the policy
- // when it's given to us, so perhaps we should abort or otherwise
- // raise bloody murder.
ldpp_dout(dpp, 0) << "Error reading IAM Policy: " << e.what() << dendl;
- ret = -EACCES;
+
+ // This really shouldn't happen. We parse the policy when it's given to us,
+ // so a parsing failure here means we broke backward compatibility. The only
+ // sensible thing to do in this case is to deny access, because the policy
+ // may have.
+ //
+ // However, the only way for an administrator to repair such a bucket is to
+ // send a PutBucketPolicy or DeleteBucketPolicy request as an admin/system
+ // user. We can allow such requests, because even if the policy denied
+ // access, admin/system users override that error from verify_permission().
+ if (!s->system_request) {
+ ret = -EACCES;
+ }
}
bool success = driver->get_zone()->get_redirect_endpoint(&s->redirect_zone_endpoint);
@@ -692,24 +680,21 @@ int rgw_build_bucket_policies(const DoutPrefixProvider *dpp, rgw::sal::Driver* d
int rgw_build_object_policies(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver,
req_state *s, bool prefetch_data, optional_yield y)
{
- int ret = 0;
-
- if (!rgw::sal::Object::empty(s->object.get())) {
- if (!s->bucket_exists) {
- return -ERR_NO_SUCH_BUCKET;
- }
- s->object_acl = std::make_unique<RGWAccessControlPolicy>(s->cct);
+ if (rgw::sal::Object::empty(s->object)) {
+ return 0;
+ }
+ if (!s->bucket_exists) {
+ return -ERR_NO_SUCH_BUCKET;
+ }
- s->object->set_atomic();
- if (prefetch_data) {
- s->object->set_prefetch_data();
- }
- ret = read_obj_policy(dpp, driver, s, s->bucket->get_info(), s->bucket_attrs,
- s->object_acl.get(), nullptr, s->iam_policy, s->bucket.get(),
- s->object.get(), y);
+ s->object->set_atomic();
+ if (prefetch_data) {
+ s->object->set_prefetch_data();
}
- return ret;
+ return read_obj_policy(dpp, driver, s, s->bucket->get_info(), s->bucket_attrs,
+ s->object_acl, nullptr, s->iam_policy, s->bucket.get(),
+ s->object.get(), y);
}
static int rgw_iam_remove_objtags(const DoutPrefixProvider *dpp, req_state* s, rgw::sal::Object* object, bool has_existing_obj_tag, bool has_resource_tag) {
@@ -1451,7 +1436,7 @@ int RGWOp::init_quota()
driver->get_user(s->bucket->get_info().owner);
rgw::sal::User* user;
- if (s->user->get_id() == s->bucket_owner.get_id()) {
+ if (s->user->get_id() == s->bucket_owner.id) {
user = s->user.get();
} else {
int r = owner_user->load_user(this, s->yield);
@@ -1642,9 +1627,8 @@ int rgw_policy_from_attrset(const DoutPrefixProvider *dpp, CephContext *cct, map
return -EIO;
}
if (cct->_conf->subsys.should_gather<ceph_subsys_rgw, 15>()) {
- RGWAccessControlPolicy_S3 *s3policy = static_cast<RGWAccessControlPolicy_S3 *>(policy);
ldpp_dout(dpp, 15) << __func__ << " Read AccessControlPolicy";
- s3policy->to_xml(*_dout);
+ rgw::s3::write_policy_xml(*policy, *_dout);
*_dout << dendl;
}
return 0;
@@ -1652,7 +1636,7 @@ int rgw_policy_from_attrset(const DoutPrefixProvider *dpp, CephContext *cct, map
int RGWGetObj::read_user_manifest_part(rgw::sal::Bucket* bucket,
const rgw_bucket_dir_entry& ent,
- RGWAccessControlPolicy * const bucket_acl,
+ const RGWAccessControlPolicy& bucket_acl,
const boost::optional<Policy>& bucket_policy,
const off_t start_ofs,
const off_t end_ofs,
@@ -1669,7 +1653,7 @@ int RGWGetObj::read_user_manifest_part(rgw::sal::Bucket* bucket,
std::unique_ptr<rgw::sal::Object> part = bucket->get_object(ent.key);
- RGWAccessControlPolicy obj_policy(s->cct);
+ RGWAccessControlPolicy obj_policy;
ldpp_dout(this, 20) << "reading obj=" << part << " ofs=" << cur_ofs
<< " end=" << cur_end << dendl;
@@ -1728,8 +1712,8 @@ int RGWGetObj::read_user_manifest_part(rgw::sal::Bucket* bucket,
ldpp_dout(this, 2) << "overriding permissions due to system operation" << dendl;
} else if (s->auth.identity->is_admin_of(s->user->get_id())) {
ldpp_dout(this, 2) << "overriding permissions due to admin operation" << dendl;
- } else if (!verify_object_permission(this, s, part->get_obj(), s->user_acl.get(),
- bucket_acl, &obj_policy, bucket_policy,
+ } else if (!verify_object_permission(this, s, part->get_obj(), s->user_acl,
+ bucket_acl, obj_policy, bucket_policy,
s->iam_user_policies, s->session_policies, action)) {
return -EPERM;
}
@@ -1753,14 +1737,14 @@ static int iterate_user_manifest_parts(const DoutPrefixProvider *dpp,
const off_t end,
rgw::sal::Bucket* bucket,
const string& obj_prefix,
- RGWAccessControlPolicy * const bucket_acl,
+ const RGWAccessControlPolicy& bucket_acl,
const boost::optional<Policy>& bucket_policy,
uint64_t * const ptotal_len,
uint64_t * const pobj_size,
string * const pobj_sum,
int (*cb)(rgw::sal::Bucket* bucket,
const rgw_bucket_dir_entry& ent,
- RGWAccessControlPolicy * const bucket_acl,
+ const RGWAccessControlPolicy& bucket_acl,
const boost::optional<Policy>& bucket_policy,
off_t start_ofs,
off_t end_ofs,
@@ -1862,7 +1846,7 @@ static int iterate_slo_parts(const DoutPrefixProvider *dpp,
map<uint64_t, rgw_slo_part>& slo_parts,
int (*cb)(rgw::sal::Bucket* bucket,
const rgw_bucket_dir_entry& ent,
- RGWAccessControlPolicy *bucket_acl,
+ const RGWAccessControlPolicy& bucket_acl,
const boost::optional<Policy>& bucket_policy,
off_t start_ofs,
off_t end_ofs,
@@ -1921,7 +1905,7 @@ static int iterate_slo_parts(const DoutPrefixProvider *dpp,
<< dendl;
// SLO is a Swift thing, and Swift has no knowledge of S3 Policies.
- int r = cb(part.bucket, ent, part.bucket_acl,
+ int r = cb(part.bucket, ent, *part.bucket_acl,
(part.bucket_policy ?
boost::optional<Policy>(*part.bucket_policy) : none),
start_ofs, end_ofs, cb_param, true /* swift_slo */);
@@ -1938,7 +1922,7 @@ static int iterate_slo_parts(const DoutPrefixProvider *dpp,
static int get_obj_user_manifest_iterate_cb(rgw::sal::Bucket* bucket,
const rgw_bucket_dir_entry& ent,
- RGWAccessControlPolicy * const bucket_acl,
+ const RGWAccessControlPolicy& bucket_acl,
const boost::optional<Policy>& bucket_policy,
const off_t start_ofs,
const off_t end_ofs,
@@ -1964,7 +1948,7 @@ int RGWGetObj::handle_user_manifest(const char *prefix, optional_yield y)
const std::string bucket_name = url_decode(prefix_view.substr(0, pos));
const std::string obj_prefix = url_decode(prefix_view.substr(pos + 1));
- RGWAccessControlPolicy _bucket_acl(s->cct);
+ RGWAccessControlPolicy _bucket_acl;
RGWAccessControlPolicy *bucket_acl;
boost::optional<Policy> _bucket_policy;
boost::optional<Policy>* bucket_policy;
@@ -1983,7 +1967,7 @@ int RGWGetObj::handle_user_manifest(const char *prefix, optional_yield y)
return r;
}
bucket_acl = &_bucket_acl;
- r = read_bucket_policy(this, driver, s, ubucket->get_info(), bucket_attrs, bucket_acl, ubucket->get_key(), y);
+ r = read_bucket_policy(this, driver, s, ubucket->get_info(), bucket_attrs, *bucket_acl, ubucket->get_key(), y);
if (r < 0) {
ldpp_dout(this, 0) << "failed to read bucket policy" << dendl;
return r;
@@ -1993,7 +1977,7 @@ int RGWGetObj::handle_user_manifest(const char *prefix, optional_yield y)
pbucket = ubucket.get();
} else {
pbucket = s->bucket.get();
- bucket_acl = s->bucket_acl.get();
+ bucket_acl = &s->bucket_acl;
bucket_policy = &s->iam_policy;
}
@@ -2002,7 +1986,7 @@ int RGWGetObj::handle_user_manifest(const char *prefix, optional_yield y)
* - overall DLO's content size,
* - md5 sum of overall DLO's content (for etag of Swift API). */
r = iterate_user_manifest_parts(this, s->cct, driver, ofs, end,
- pbucket, obj_prefix, bucket_acl, *bucket_policy,
+ pbucket, obj_prefix, *bucket_acl, *bucket_policy,
nullptr, &s->obj_size, &lo_etag,
nullptr /* cb */, nullptr /* cb arg */, y);
if (r < 0) {
@@ -2016,7 +2000,7 @@ int RGWGetObj::handle_user_manifest(const char *prefix, optional_yield y)
}
r = iterate_user_manifest_parts(this, s->cct, driver, ofs, end,
- pbucket, obj_prefix, bucket_acl, *bucket_policy,
+ pbucket, obj_prefix, *bucket_acl, *bucket_policy,
&total_len, nullptr, nullptr,
nullptr, nullptr, y);
if (r < 0) {
@@ -2030,7 +2014,7 @@ int RGWGetObj::handle_user_manifest(const char *prefix, optional_yield y)
}
r = iterate_user_manifest_parts(this, s->cct, driver, ofs, end,
- pbucket, obj_prefix, bucket_acl, *bucket_policy,
+ pbucket, obj_prefix, *bucket_acl, *bucket_policy,
nullptr, nullptr, nullptr,
get_obj_user_manifest_iterate_cb, (void *)this, y);
if (r < 0) {
@@ -2102,8 +2086,7 @@ int RGWGetObj::handle_slo_manifest(bufferlist& bl, optional_yield y)
bucket_policy = piter->second.second.get_ptr();
bucket = buckets[bucket_name].get();
} else {
- allocated_acls.push_back(RGWAccessControlPolicy(s->cct));
- RGWAccessControlPolicy& _bucket_acl = allocated_acls.back();
+ RGWAccessControlPolicy& _bucket_acl = allocated_acls.emplace_back();
std::unique_ptr<rgw::sal::Bucket> tmp_bucket;
int r = driver->load_bucket(this, rgw_bucket(s->user->get_tenant(),
@@ -2116,7 +2099,7 @@ int RGWGetObj::handle_slo_manifest(bufferlist& bl, optional_yield y)
}
bucket = tmp_bucket.get();
bucket_acl = &_bucket_acl;
- r = read_bucket_policy(this, driver, s, tmp_bucket->get_info(), tmp_bucket->get_attrs(), bucket_acl,
+ r = read_bucket_policy(this, driver, s, tmp_bucket->get_info(), tmp_bucket->get_attrs(), *bucket_acl,
tmp_bucket->get_key(), y);
if (r < 0) {
ldpp_dout(this, 0) << "failed to read bucket ACL for bucket "
@@ -2131,7 +2114,7 @@ int RGWGetObj::handle_slo_manifest(bufferlist& bl, optional_yield y)
}
} else {
bucket = s->bucket.get();
- bucket_acl = s->bucket_acl.get();
+ bucket_acl = &s->bucket_acl;
bucket_policy = s->iam_policy.get_ptr();
}
@@ -3518,10 +3501,10 @@ void RGWCreateBucket::execute(optional_yield y)
}
// don't allow changes to the acl policy
- RGWAccessControlPolicy old_policy(get_cct());
+ RGWAccessControlPolicy old_policy;
int r = rgw_op_get_bucket_policy_from_attr(this, s->cct, driver, info.owner,
s->bucket->get_attrs(),
- &old_policy, y);
+ old_policy, y);
if (r >= 0 && old_policy != policy) {
s->err.message = "Cannot modify existing access control policy";
op_ret = -EEXIST;
@@ -3529,8 +3512,8 @@ void RGWCreateBucket::execute(optional_yield y)
}
}
- s->bucket_owner.set_id(s->user->get_id());
- s->bucket_owner.set_name(s->user->get_display_name());
+ s->bucket_owner.id = s->user->get_id();
+ s->bucket_owner.display_name = s->user->get_display_name();
createparams.owner = s->user->get_id();
buffer::list aclbl;
@@ -3845,7 +3828,7 @@ int RGWPutObj::verify_permission(optional_yield y)
{
if (! copy_source.empty()) {
- RGWAccessControlPolicy cs_acl(s->cct);
+ RGWAccessControlPolicy cs_acl;
boost::optional<Policy> policy;
map<string, bufferlist> cs_attrs;
auto cs_bucket = driver->get_bucket(copy_source_bucket_info);
@@ -3855,14 +3838,14 @@ int RGWPutObj::verify_permission(optional_yield y)
cs_object->set_prefetch_data();
/* check source object permissions */
- int ret = read_obj_policy(this, driver, s, copy_source_bucket_info, cs_attrs, &cs_acl, nullptr,
+ int ret = read_obj_policy(this, driver, s, copy_source_bucket_info, cs_attrs, cs_acl, nullptr,
policy, cs_bucket.get(), cs_object.get(), y, true);
if (ret < 0) {
return ret;
}
/* admin request overrides permission checks */
- if (! s->auth.identity->is_admin_of(cs_acl.get_owner().get_id())) {
+ if (! s->auth.identity->is_admin_of(cs_acl.get_owner().id)) {
if (policy || ! s->iam_user_policies.empty() || !s->session_policies.empty()) {
//add source object tags for permission evaluation
auto [has_s3_existing_tag, has_s3_resource_tag] = rgw_check_policy_condition(this, policy, s->iam_user_policies, s->session_policies);
@@ -4271,7 +4254,7 @@ void RGWPutObj::execute(optional_yield y)
return;
}
processor = driver->get_append_writer(this, s->yield, s->object.get(),
- s->bucket_owner.get_id(),
+ s->bucket_owner.id,
pdest_placement, s->req_id, position,
&cur_accounted_size);
} else {
@@ -4284,7 +4267,7 @@ void RGWPutObj::execute(optional_yield y)
}
}
processor = driver->get_atomic_writer(this, s->yield, s->object.get(),
- s->bucket_owner.get_id(),
+ s->bucket_owner.id,
pdest_placement, olh_epoch, s->req_id);
}
@@ -4690,7 +4673,7 @@ void RGWPostObj::execute(optional_yield y)
std::unique_ptr<rgw::sal::Writer> processor;
processor = driver->get_atomic_writer(this, s->yield, obj.get(),
- s->bucket_owner.get_id(),
+ s->bucket_owner.id,
&s->dest_placement, 0, s->req_id);
op_ret = processor->prepare(s->yield);
if (op_ret < 0) {
@@ -4991,11 +4974,7 @@ void RGWPutMetadataBucket::execute(optional_yield y)
* contain such keys yet. */
if (has_policy) {
if (s->dialect.compare("swift") == 0) {
- auto old_policy = \
- static_cast<RGWAccessControlPolicy_SWIFT*>(s->bucket_acl.get());
- auto new_policy = static_cast<RGWAccessControlPolicy_SWIFT*>(&policy);
- new_policy->filter_merge(policy_rw_mask, old_policy);
- policy = *new_policy;
+ rgw::swift::merge_policy(policy_rw_mask, s->bucket_acl, policy);
}
buffer::list bl;
policy.encode(bl);
@@ -5486,7 +5465,7 @@ int RGWCopyObj::init_processing(optional_yield y)
int RGWCopyObj::verify_permission(optional_yield y)
{
- RGWAccessControlPolicy src_acl(s->cct);
+ RGWAccessControlPolicy src_acl;
boost::optional<Policy> src_policy;
/* get buckets info (source and dest) */
@@ -5497,7 +5476,7 @@ int RGWCopyObj::verify_permission(optional_yield y)
rgw_placement_rule src_placement;
/* check source object permissions */
- op_ret = read_obj_policy(this, driver, s, src_bucket->get_info(), src_bucket->get_attrs(), &src_acl, &src_placement.storage_class,
+ op_ret = read_obj_policy(this, driver, s, src_bucket->get_info(), src_bucket->get_attrs(), src_acl, &src_placement.storage_class,
src_policy, src_bucket.get(), s->src_object.get(), y);
if (op_ret < 0) {
return op_ret;
@@ -5514,7 +5493,7 @@ int RGWCopyObj::verify_permission(optional_yield y)
}
/* admin request overrides permission checks */
- if (!s->auth.identity->is_admin_of(src_acl.get_owner().get_id())) {
+ if (!s->auth.identity->is_admin_of(src_acl.get_owner().id)) {
if (src_policy || ! s->iam_user_policies.empty() || !s->session_policies.empty()) {
auto [has_s3_existing_tag, has_s3_resource_tag] = rgw_check_policy_condition(this, src_policy, s->iam_user_policies, s->session_policies);
if (has_s3_existing_tag || has_s3_resource_tag)
@@ -5585,20 +5564,20 @@ int RGWCopyObj::verify_permission(optional_yield y)
}
}
- RGWAccessControlPolicy dest_bucket_policy(s->cct);
+ RGWAccessControlPolicy dest_bucket_policy;
s->object->set_atomic();
/* check dest bucket permissions */
op_ret = read_bucket_policy(this, driver, s, s->bucket->get_info(),
s->bucket->get_attrs(),
- &dest_bucket_policy, s->bucket->get_key(), y);
+ dest_bucket_policy, s->bucket->get_key(), y);
if (op_ret < 0) {
return op_ret;
}
auto dest_iam_policy = get_iam_policy_from_attr(s->cct, s->bucket->get_attrs(), s->bucket->get_tenant());
/* admin request overrides permission checks */
- if (! s->auth.identity->is_admin_of(dest_policy.get_owner().get_id())){
+ if (! s->auth.identity->is_admin_of(dest_policy.get_owner().id)){
if (dest_iam_policy != boost::none || ! s->iam_user_policies.empty() || !s->session_policies.empty()) {
//Add destination bucket tags for authorization
auto [has_s3_existing_tag, has_s3_resource_tag] = rgw_check_policy_condition(this, dest_iam_policy, s->iam_user_policies, s->session_policies);
@@ -5896,11 +5875,11 @@ void RGWGetACLs::pre_exec()
void RGWGetACLs::execute(optional_yield y)
{
stringstream ss;
- RGWAccessControlPolicy* const acl = \
- (!rgw::sal::Object::empty(s->object.get()) ? s->object_acl.get() : s->bucket_acl.get());
- RGWAccessControlPolicy_S3* const s3policy = \
- static_cast<RGWAccessControlPolicy_S3*>(acl);
- s3policy->to_xml(ss);
+ if (rgw::sal::Object::empty(s->object.get())) {
+ rgw::s3::write_policy_xml(s->bucket_acl, ss);
+ } else {
+ rgw::s3::write_policy_xml(s->object_acl, ss);
+ }
acls = ss.str();
}
@@ -5991,25 +5970,10 @@ void RGWDeleteLC::pre_exec()
void RGWPutACLs::execute(optional_yield y)
{
- bufferlist bl;
+ const RGWAccessControlPolicy& existing_policy = \
+ (rgw::sal::Object::empty(s->object.get()) ? s->bucket_acl : s->object_acl);
- RGWAccessControlPolicy_S3 *policy = NULL;
- RGWACLXMLParser_S3 parser(s->cct);
- RGWAccessControlPolicy_S3 new_policy(s->cct);
- stringstream ss;
-
- op_ret = 0; /* XXX redundant? */
-
- if (!parser.init()) {
- op_ret = -EINVAL;
- return;
- }
-
-
- RGWAccessControlPolicy* const existing_policy = \
- (rgw::sal::Object::empty(s->object.get()) ? s->bucket_acl.get() : s->object_acl.get());
-
- owner = existing_policy->get_owner();
+ const ACLOwner& existing_owner = existing_policy.get_owner();
op_ret = get_params(y);
if (op_ret < 0) {
@@ -6032,26 +5996,24 @@ void RGWPutACLs::execute(optional_yield y)
return;
}
+ RGWAccessControlPolicy new_policy;
if (!s->canned_acl.empty() || s->has_acl_header) {
- op_ret = get_policy_from_state(driver, s, ss);
- if (op_ret < 0)
- return;
-
- data.clear();
- data.append(ss.str());
+ op_ret = get_policy_from_state(existing_owner, new_policy);
+ } else {
+ op_ret = rgw::s3::parse_policy(this, y, driver, {data.c_str(), data.length()},
+ new_policy, s->err.message);
}
-
- if (!parser.parse(data.c_str(), data.length(), 1)) {
- op_ret = -EINVAL;
+ if (op_ret < 0)
return;
- }
- policy = static_cast<RGWAccessControlPolicy_S3 *>(parser.find_first("AccessControlPolicy"));
- if (!policy) {
- op_ret = -EINVAL;
+
+ if (!existing_owner.id.empty() &&
+ existing_owner.id != new_policy.get_owner().id) {
+ s->err.message = "Cannot modify ACL Owner";
+ op_ret = -EPERM;
return;
}
- const RGWAccessControlList& req_acl = policy->get_acl();
+ const RGWAccessControlList& req_acl = new_policy.get_acl();
const multimap<string, ACLGrant>& req_grant_map = req_acl.get_grant_map();
#define ACL_GRANTS_MAX_NUM 100
int max_num = s->cct->_conf->rgw_acl_grants_max_num;
@@ -6072,13 +6034,8 @@ void RGWPutACLs::execute(optional_yield y)
// forward bucket acl requests to meta master zone
if ((rgw::sal::Object::empty(s->object.get()))) {
- bufferlist in_data;
- // include acl data unless it was generated from a canned_acl
- if (s->canned_acl.empty()) {
- in_data.append(data);
- }
op_ret = rgw_forward_request_to_master(this, *s->penv.site, s->user->get_id(),
- &in_data, nullptr, s->info, y);
+ &data, nullptr, s->info, y);
if (op_ret < 0) {
ldpp_dout(this, 0) << "forward_request_to_master returned ret=" << op_ret << dendl;
return;
@@ -6087,17 +6044,11 @@ void RGWPutACLs::execute(optional_yield y)
if (s->cct->_conf->subsys.should_gather<ceph_subsys_rgw, 15>()) {
ldpp_dout(this, 15) << "Old AccessControlPolicy";
- policy->to_xml(*_dout);
+ rgw::s3::write_policy_xml(existing_policy, *_dout);
*_dout << dendl;
- }
-
- op_ret = policy->rebuild(this, driver, &owner, new_policy, s->err.message);
- if (op_ret < 0)
- return;
- if (s->cct->_conf->subsys.should_gather<ceph_subsys_rgw, 15>()) {
ldpp_dout(this, 15) << "New AccessControlPolicy:";
- new_policy.to_xml(*_dout);
+ rgw::s3::write_policy_xml(new_policy, *_dout);
*_dout << dendl;
}
@@ -6107,6 +6058,8 @@ void RGWPutACLs::execute(optional_yield y)
op_ret = -EACCES;
return;
}
+
+ bufferlist bl;
new_policy.encode(bl);
map<string, bufferlist> attrs;
@@ -7410,8 +7363,8 @@ bool RGWBulkDelete::Deleter::verify_permission(RGWBucketInfo& binfo,
ACLOwner& bucket_owner /* out */,
optional_yield y)
{
- RGWAccessControlPolicy bacl(driver->ctx());
- int ret = read_bucket_policy(dpp, driver, s, binfo, battrs, &bacl, binfo.bucket, y);
+ RGWAccessControlPolicy bacl;
+ int ret = read_bucket_policy(dpp, driver, s, binfo, battrs, bacl, binfo.bucket, y);
if (ret < 0) {
return false;
}
@@ -7422,8 +7375,8 @@ bool RGWBulkDelete::Deleter::verify_permission(RGWBucketInfo& binfo,
/* We can use global user_acl because each BulkDelete request is allowed
* to work on entities from a single account only. */
- return verify_bucket_permission(dpp, s, binfo.bucket, s->user_acl.get(),
- &bacl, policy, s->iam_user_policies, s->session_policies, rgw::IAM::s3DeleteBucket);
+ return verify_bucket_permission(dpp, s, binfo.bucket, s->user_acl,
+ bacl, policy, s->iam_user_policies, s->session_policies, rgw::IAM::s3DeleteBucket);
}
bool RGWBulkDelete::Deleter::delete_single(const acct_path_t& path, optional_yield y)
@@ -7447,7 +7400,7 @@ bool RGWBulkDelete::Deleter::delete_single(const acct_path_t& path, optional_yie
if (!path.obj_key.empty()) { // object deletion
ACLOwner bucket_owner;
- bucket_owner.set_id(bucket->get_info().owner);
+ bucket_owner.id = bucket->get_info().owner;
std::unique_ptr<rgw::sal::Object> obj = bucket->get_object(path.obj_key);
obj->set_atomic();
@@ -7749,8 +7702,8 @@ bool RGWBulkUploadOp::handle_file_verify_permission(RGWBucketInfo& binfo,
ACLOwner& bucket_owner /* out */,
optional_yield y)
{
- RGWAccessControlPolicy bacl(driver->ctx());
- op_ret = read_bucket_policy(this, driver, s, binfo, battrs, &bacl, binfo.bucket, y);
+ RGWAccessControlPolicy bacl;
+ op_ret = read_bucket_policy(this, driver, s, binfo, battrs, bacl, binfo.bucket, y);
if (op_ret < 0) {
ldpp_dout(this, 20) << "cannot read_policy() for bucket" << dendl;
return false;
@@ -7803,8 +7756,8 @@ bool RGWBulkUploadOp::handle_file_verify_permission(RGWBucketInfo& binfo,
}
}
- return verify_bucket_permission_no_policy(this, s, s->user_acl.get(),
- &bacl, RGW_PERM_WRITE);
+ return verify_bucket_permission_no_policy(this, s, s->user_acl,
+ bacl, RGW_PERM_WRITE);
}
int RGWBulkUploadOp::handle_file(const std::string_view path,
@@ -7860,7 +7813,7 @@ int RGWBulkUploadOp::handle_file(const std::string_view path,
std::unique_ptr<rgw::sal::Writer> processor;
processor = driver->get_atomic_writer(this, s->yield, obj.get(),
- bowner.get_id(),
+ bowner.id,
&s->dest_placement, 0, s->req_id);
op_ret = processor->prepare(s->yield);
if (op_ret < 0) {
@@ -8245,7 +8198,7 @@ void RGWGetObjLayout::execute(optional_yield y)
int RGWConfigBucketMetaSearch::verify_permission(optional_yield y)
{
- if (!s->auth.identity->is_owner_of(s->bucket_owner.get_id())) {
+ if (!s->auth.identity->is_owner_of(s->bucket_owner.id)) {
return -EACCES;
}
@@ -8278,7 +8231,7 @@ void RGWConfigBucketMetaSearch::execute(optional_yield y)
int RGWGetBucketMetaSearch::verify_permission(optional_yield y)
{
- if (!s->auth.identity->is_owner_of(s->bucket_owner.get_id())) {
+ if (!s->auth.identity->is_owner_of(s->bucket_owner.id)) {
return -EACCES;
}
@@ -8292,7 +8245,7 @@ void RGWGetBucketMetaSearch::pre_exec()
int RGWDelBucketMetaSearch::verify_permission(optional_yield y)
{
- if (!s->auth.identity->is_owner_of(s->bucket_owner.get_id())) {
+ if (!s->auth.identity->is_owner_of(s->bucket_owner.id)) {
return -EACCES;
}
@@ -8915,7 +8868,7 @@ int RGWGetBucketPolicyStatus::verify_permission(optional_yield y)
void RGWGetBucketPolicyStatus::execute(optional_yield y)
{
- isPublic = (s->iam_policy && rgw::IAM::is_public(*s->iam_policy)) || s->bucket_acl->is_public(this);
+ isPublic = (s->iam_policy && rgw::IAM::is_public(*s->iam_policy)) || s->bucket_acl.is_public(this);
}
int RGWPutBucketPublicAccessBlock::verify_permission(optional_yield y)
diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h
index be7232ae392..9314d454c79 100644
--- a/src/rgw/rgw_op.h
+++ b/src/rgw/rgw_op.h
@@ -79,7 +79,7 @@ int rgw_op_get_bucket_policy_from_attr(const DoutPrefixProvider *dpp,
rgw::sal::Driver* driver,
const rgw_user& bucket_owner,
std::map<std::string, bufferlist>& bucket_attrs,
- RGWAccessControlPolicy *policy,
+ RGWAccessControlPolicy& policy,
optional_yield y);
class RGWHandler {
@@ -420,7 +420,7 @@ public:
int read_user_manifest_part(
rgw::sal::Bucket* bucket,
const rgw_bucket_dir_entry& ent,
- RGWAccessControlPolicy * const bucket_acl,
+ const RGWAccessControlPolicy& bucket_acl,
const boost::optional<rgw::IAM::Policy>& bucket_policy,
const off_t start_ofs,
const off_t end_ofs,
@@ -1098,7 +1098,6 @@ class RGWCreateBucket : public RGWOp {
void execute(optional_yield y) override;
void init(rgw::sal::Driver* driver, req_state *s, RGWHandler *h) override {
RGWOp::init(driver, s, h);
- policy.set_ctx(s->cct);
relaxed_region_enforcement =
s->cct->_conf.get_val<bool>("rgw_relaxed_region_enforcement");
}
@@ -1249,11 +1248,6 @@ public:
delete obj_legal_hold;
}
- void init(rgw::sal::Driver* driver, req_state *s, RGWHandler *h) override {
- RGWOp::init(driver, s, h);
- policy.set_ctx(s->cct);
- }
-
virtual int init_processing(optional_yield y) override;
void emplace_attr(std::string&& key, buffer::list&& bl) {
@@ -1328,11 +1322,6 @@ public:
attrs.emplace(std::move(key), std::move(bl)); /* key and bl are r-value refs */
}
- void init(rgw::sal::Driver* driver, req_state *s, RGWHandler *h) override {
- RGWOp::init(driver, s, h);
- policy.set_ctx(s->cct);
- }
-
int verify_permission(optional_yield y) override;
void pre_exec() override;
void execute(optional_yield y) override;
@@ -1367,10 +1356,6 @@ public:
has_policy(false) {
}
- void init(rgw::sal::Driver* driver, req_state *s, RGWHandler *h) override {
- RGWOp::init(driver, s, h);
- policy.set_ctx(s->cct);
- }
int init_processing(optional_yield y) override;
int verify_permission(optional_yield y) override;
void pre_exec() override { }
@@ -1406,11 +1391,6 @@ public:
attrs.emplace(std::move(key), std::move(bl)); /* key and bl are r-value refs */
}
- void init(rgw::sal::Driver* driver, req_state *s, RGWHandler *h) override {
- RGWOp::init(driver, s, h);
- policy.set_ctx(s->cct);
- }
-
int verify_permission(optional_yield y) override;
void pre_exec() override;
void execute(optional_yield y) override;
@@ -1433,10 +1413,6 @@ public:
: dlo_manifest(NULL)
{}
- void init(rgw::sal::Driver* driver, req_state *s, RGWHandler *h) override {
- RGWOp::init(driver, s, h);
- policy.set_ctx(s->cct);
- }
int verify_permission(optional_yield y) override;
void pre_exec() override;
void execute(optional_yield y) override;
@@ -1560,10 +1536,6 @@ public:
attrs.emplace(std::move(key), std::move(bl));
}
- void init(rgw::sal::Driver* driver, req_state *s, RGWHandler *h) override {
- RGWOp::init(driver, s, h);
- dest_policy.set_ctx(s->cct);
- }
int init_processing(optional_yield y) override;
int verify_permission(optional_yield y) override;
void pre_exec() override;
@@ -1604,7 +1576,6 @@ public:
class RGWPutACLs : public RGWOp {
protected:
bufferlist data;
- ACLOwner owner;
public:
RGWPutACLs() {}
@@ -1614,7 +1585,8 @@ public:
void pre_exec() override;
void execute(optional_yield y) override;
- virtual int get_policy_from_state(rgw::sal::Driver* driver, req_state *s, std::stringstream& ss) { return 0; }
+ virtual int get_policy_from_state(const ACLOwner& owner,
+ RGWAccessControlPolicy& p) { return 0; }
virtual int get_params(optional_yield y) = 0;
void send_response() override = 0;
const char* name() const override { return "put_acls"; }
@@ -1664,7 +1636,6 @@ public:
void pre_exec() override;
void execute(optional_yield y) override;
-// virtual int get_policy_from_state(RGWRados* driver, req_state *s, std::stringstream& ss) { return 0; }
virtual int get_params(optional_yield y) = 0;
void send_response() override = 0;
const char* name() const override { return "put_lifecycle"; }
@@ -1844,10 +1815,6 @@ protected:
public:
RGWInitMultipart() {}
- void init(rgw::sal::Driver* driver, req_state *s, RGWHandler *h) override {
- RGWOp::init(driver, s, h);
- policy.set_ctx(s->cct);
- }
int verify_permission(optional_yield y) override;
void pre_exec() override;
void execute(optional_yield y) override;
@@ -1919,10 +1886,6 @@ public:
truncated = false;
}
- void init(rgw::sal::Driver* driver, req_state *s, RGWHandler *h) override {
- RGWOp::init(driver, s, h);
- policy = RGWAccessControlPolicy(s->cct);
- }
int verify_permission(optional_yield y) override;
void pre_exec() override;
void execute(optional_yield y) override;
diff --git a/src/rgw/rgw_quota.cc b/src/rgw/rgw_quota.cc
index c2f300b1fdb..836086b5be2 100644
--- a/src/rgw/rgw_quota.cc
+++ b/src/rgw/rgw_quota.cc
@@ -86,24 +86,15 @@ public:
const DoutPrefixProvider* dpp);
void adjust_stats(const rgw_user& user, rgw_bucket& bucket, int objs_delta, uint64_t added_bytes, uint64_t removed_bytes);
- void set_stats(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs, RGWStorageStats& stats);
+ void set_stats(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs, const RGWStorageStats& stats);
int async_refresh(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs);
- void async_refresh_response(const rgw_user& user, rgw_bucket& bucket, RGWStorageStats& stats);
+ void async_refresh_response(const rgw_user& user, rgw_bucket& bucket, const RGWStorageStats& stats);
void async_refresh_fail(const rgw_user& user, rgw_bucket& bucket);
- class AsyncRefreshHandler {
- protected:
- rgw::sal::Driver* driver;
- RGWQuotaCache<T> *cache;
- public:
- AsyncRefreshHandler(rgw::sal::Driver* _driver, RGWQuotaCache<T> *_cache) : driver(_driver), cache(_cache) {}
- virtual ~AsyncRefreshHandler() {}
-
- virtual int init_fetch() = 0;
- virtual void drop_reference() = 0;
- };
-
- virtual AsyncRefreshHandler *allocate_refresh_handler(const rgw_user& user, const rgw_bucket& bucket) = 0;
+ /// start an async refresh that will eventually call async_refresh_response or
+ /// async_refresh_fail. hold a reference to the waiter until completion
+ virtual int init_refresh(const rgw_user& user, const rgw_bucket& bucket,
+ boost::intrusive_ptr<RefCountedWaitObject> waiter) = 0;
};
template<class T>
@@ -116,31 +107,17 @@ int RGWQuotaCache<T>::async_refresh(const rgw_user& user, const rgw_bucket& buck
return 0;
}
- async_refcount->get();
-
-
- AsyncRefreshHandler *handler = allocate_refresh_handler(user, bucket);
-
- int ret = handler->init_fetch();
- if (ret < 0) {
- async_refcount->put();
- handler->drop_reference();
- return ret;
- }
-
- return 0;
+ return init_refresh(user, bucket, async_refcount);
}
template<class T>
void RGWQuotaCache<T>::async_refresh_fail(const rgw_user& user, rgw_bucket& bucket)
{
ldout(driver->ctx(), 20) << "async stats refresh response for bucket=" << bucket << dendl;
-
- async_refcount->put();
}
template<class T>
-void RGWQuotaCache<T>::async_refresh_response(const rgw_user& user, rgw_bucket& bucket, RGWStorageStats& stats)
+void RGWQuotaCache<T>::async_refresh_response(const rgw_user& user, rgw_bucket& bucket, const RGWStorageStats& stats)
{
ldout(driver->ctx(), 20) << "async stats refresh response for bucket=" << bucket << dendl;
@@ -149,12 +126,10 @@ void RGWQuotaCache<T>::async_refresh_response(const rgw_user& user, rgw_bucket&
map_find(user, bucket, qs);
set_stats(user, bucket, qs, stats);
-
- async_refcount->put();
}
template<class T>
-void RGWQuotaCache<T>::set_stats(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs, RGWStorageStats& stats)
+void RGWQuotaCache<T>::set_stats(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs, const RGWStorageStats& stats)
{
qs.stats = stats;
qs.expiration = ceph_clock_now();
@@ -246,70 +221,6 @@ void RGWQuotaCache<T>::adjust_stats(const rgw_user& user, rgw_bucket& bucket, in
data_modified(user, bucket);
}
-class BucketAsyncRefreshHandler : public RGWQuotaCache<rgw_bucket>::AsyncRefreshHandler,
- public RGWGetBucketStats_CB {
- rgw_user user;
-public:
- BucketAsyncRefreshHandler(rgw::sal::Driver* _driver, RGWQuotaCache<rgw_bucket> *_cache,
- const rgw_user& _user, const rgw_bucket& _bucket) :
- RGWQuotaCache<rgw_bucket>::AsyncRefreshHandler(_driver, _cache),
- RGWGetBucketStats_CB(_bucket), user(_user) {}
-
- void drop_reference() override { put(); }
- void handle_response(int r) override;
- int init_fetch() override;
-};
-
-int BucketAsyncRefreshHandler::init_fetch()
-{
- std::unique_ptr<rgw::sal::Bucket> rbucket;
-
- const DoutPrefix dp(driver->ctx(), dout_subsys, "rgw bucket async refresh handler: ");
- int r = driver->load_bucket(&dp, bucket, &rbucket, null_yield);
- if (r < 0) {
- ldpp_dout(&dp, 0) << "could not get bucket info for bucket=" << bucket << " r=" << r << dendl;
- return r;
- }
-
- ldpp_dout(&dp, 20) << "initiating async quota refresh for bucket=" << bucket << dendl;
-
- const auto& index = rbucket->get_info().get_current_index();
- if (is_layout_indexless(index)) {
- return 0;
- }
-
- r = rbucket->read_stats_async(&dp, index, RGW_NO_SHARD, this);
- if (r < 0) {
- ldpp_dout(&dp, 0) << "could not get bucket info for bucket=" << bucket.name << dendl;
-
- /* read_stats_async() dropped our reference already */
- return r;
- }
-
- return 0;
-}
-
-void BucketAsyncRefreshHandler::handle_response(const int r)
-{
- if (r < 0) {
- ldout(driver->ctx(), 20) << "AsyncRefreshHandler::handle_response() r=" << r << dendl;
- cache->async_refresh_fail(user, bucket);
- return;
- }
-
- RGWStorageStats bs;
-
- for (const auto& pair : *stats) {
- const RGWStorageStats& s = pair.second;
-
- bs.size += s.size;
- bs.size_rounded += s.size_rounded;
- bs.num_objects += s.num_objects;
- }
-
- cache->async_refresh_response(user, bucket, bs);
-}
-
class RGWBucketStatsCache : public RGWQuotaCache<rgw_bucket> {
protected:
bool map_find(const rgw_user& user, const rgw_bucket& bucket, RGWQuotaCacheStats& qs) override {
@@ -330,9 +241,8 @@ public:
explicit RGWBucketStatsCache(rgw::sal::Driver* _driver) : RGWQuotaCache<rgw_bucket>(_driver, _driver->ctx()->_conf->rgw_bucket_quota_cache_size) {
}
- AsyncRefreshHandler *allocate_refresh_handler(const rgw_user& user, const rgw_bucket& bucket) override {
- return new BucketAsyncRefreshHandler(driver, this, user, bucket);
- }
+ int init_refresh(const rgw_user& user, const rgw_bucket& bucket,
+ boost::intrusive_ptr<RefCountedWaitObject> waiter) override;
};
int RGWBucketStatsCache::fetch_stats_from_storage(const rgw_user& _u, const rgw_bucket& _b, RGWStorageStats& stats, optional_yield y, const DoutPrefixProvider *dpp)
@@ -376,48 +286,57 @@ int RGWBucketStatsCache::fetch_stats_from_storage(const rgw_user& _u, const rgw_
return 0;
}
-class UserAsyncRefreshHandler : public RGWQuotaCache<rgw_user>::AsyncRefreshHandler,
- public RGWGetUserStats_CB {
- const DoutPrefixProvider *dpp;
+class BucketAsyncRefreshHandler : public rgw::sal::ReadStatsCB {
+ RGWBucketStatsCache* cache;
+ boost::intrusive_ptr<RefCountedWaitObject> waiter;
+ rgw_user user;
rgw_bucket bucket;
public:
- UserAsyncRefreshHandler(const DoutPrefixProvider *_dpp, rgw::sal::Driver* _driver, RGWQuotaCache<rgw_user> *_cache,
- const rgw_user& _user, const rgw_bucket& _bucket) :
- RGWQuotaCache<rgw_user>::AsyncRefreshHandler(_driver, _cache),
- RGWGetUserStats_CB(_user),
- dpp(_dpp),
- bucket(_bucket) {}
-
- void drop_reference() override { put(); }
- int init_fetch() override;
- void handle_response(int r) override;
+ BucketAsyncRefreshHandler(RGWBucketStatsCache* cache,
+ boost::intrusive_ptr<RefCountedWaitObject> waiter,
+ const rgw_user& user, const rgw_bucket& bucket)
+ : cache(cache), waiter(std::move(waiter)), user(user), bucket(bucket) {}
+
+ void handle_response(int r, const RGWStorageStats& stats) override {
+ if (r < 0) {
+ cache->async_refresh_fail(user, bucket);
+ return;
+ }
+
+ cache->async_refresh_response(user, bucket, stats);
+ }
};
-int UserAsyncRefreshHandler::init_fetch()
+
+int RGWBucketStatsCache::init_refresh(const rgw_user& user, const rgw_bucket& bucket,
+ boost::intrusive_ptr<RefCountedWaitObject> waiter)
{
- std::unique_ptr<rgw::sal::User> ruser = driver->get_user(user);
+ std::unique_ptr<rgw::sal::Bucket> rbucket;
- ldpp_dout(dpp, 20) << "initiating async quota refresh for user=" << user << dendl;
- int r = ruser->read_stats_async(dpp, this);
+ const DoutPrefix dp(driver->ctx(), dout_subsys, "rgw bucket async refresh handler: ");
+ int r = driver->load_bucket(&dp, bucket, &rbucket, null_yield);
if (r < 0) {
- ldpp_dout(dpp, 0) << "could not get bucket info for user=" << user << dendl;
-
- /* get_bucket_stats_async() dropped our reference already */
+ ldpp_dout(&dp, 0) << "could not get bucket info for bucket=" << bucket << " r=" << r << dendl;
return r;
}
- return 0;
-}
+ ldpp_dout(&dp, 20) << "initiating async quota refresh for bucket=" << bucket << dendl;
-void UserAsyncRefreshHandler::handle_response(int r)
-{
+ const auto& index = rbucket->get_info().get_current_index();
+ if (is_layout_indexless(index)) {
+ return 0;
+ }
+
+ boost::intrusive_ptr handler = new BucketAsyncRefreshHandler(
+ this, std::move(waiter), user, bucket);
+
+ r = rbucket->read_stats_async(&dp, index, RGW_NO_SHARD, std::move(handler));
if (r < 0) {
- ldout(driver->ctx(), 20) << "AsyncRefreshHandler::handle_response() r=" << r << dendl;
- cache->async_refresh_fail(user, bucket);
- return;
+ ldpp_dout(&dp, 0) << "could not get bucket stats for bucket=" << bucket.name << dendl;
+ return r;
}
- cache->async_refresh_response(user, bucket, stats);
+ return 0;
}
class RGWUserStatsCache : public RGWQuotaCache<rgw_user> {
@@ -574,9 +493,8 @@ public:
stop();
}
- AsyncRefreshHandler *allocate_refresh_handler(const rgw_user& user, const rgw_bucket& bucket) override {
- return new UserAsyncRefreshHandler(dpp, driver, this, user, bucket);
- }
+ int init_refresh(const rgw_user& user, const rgw_bucket& bucket,
+ boost::intrusive_ptr<RefCountedWaitObject> waiter) override;
bool going_down() {
return down_flag;
@@ -592,6 +510,49 @@ public:
}
};
+class UserAsyncRefreshHandler : public rgw::sal::ReadStatsCB {
+ RGWUserStatsCache* cache;
+ boost::intrusive_ptr<RefCountedWaitObject> waiter;
+ rgw_bucket bucket;
+ rgw_user user;
+ public:
+ UserAsyncRefreshHandler(RGWUserStatsCache* cache,
+ boost::intrusive_ptr<RefCountedWaitObject> waiter,
+ const rgw_user& user, const rgw_bucket& bucket)
+ : cache(cache), waiter(std::move(waiter)), bucket(bucket), user(user)
+ {}
+
+ void handle_response(int r, const RGWStorageStats& stats) override;
+};
+
+int RGWUserStatsCache::init_refresh(const rgw_user& user, const rgw_bucket& bucket,
+ boost::intrusive_ptr<RefCountedWaitObject> waiter)
+{
+ boost::intrusive_ptr handler = new UserAsyncRefreshHandler(
+ this, std::move(waiter), user, bucket);
+
+ std::unique_ptr<rgw::sal::User> ruser = driver->get_user(user);
+
+ ldpp_dout(dpp, 20) << "initiating async quota refresh for user=" << user << dendl;
+ int r = ruser->read_stats_async(dpp, std::move(handler));
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "could not get bucket info for user=" << user << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+void UserAsyncRefreshHandler::handle_response(int r, const RGWStorageStats& stats)
+{
+ if (r < 0) {
+ cache->async_refresh_fail(user, bucket);
+ return;
+ }
+
+ cache->async_refresh_response(user, bucket, stats);
+}
+
int RGWUserStatsCache::fetch_stats_from_storage(const rgw_user& _u,
const rgw_bucket& _b,
RGWStorageStats& stats,
diff --git a/src/rgw/rgw_rest_client.cc b/src/rgw/rgw_rest_client.cc
index 1ccb813a109..b1ee1a9340e 100644
--- a/src/rgw/rgw_rest_client.cc
+++ b/src/rgw/rgw_rest_client.cc
@@ -3,6 +3,7 @@
#include "rgw_common.h"
#include "rgw_rest_client.h"
+#include "rgw_acl_s3.h"
#include "rgw_auth_s3.h"
#include "rgw_http_errors.h"
@@ -497,28 +498,22 @@ RGWRESTStreamS3PutObj::~RGWRESTStreamS3PutObj()
delete out_cb;
}
-static void grants_by_type_add_one_grant(map<int, string>& grants_by_type, int perm, ACLGrant& grant)
+static void grants_by_type_add_one_grant(map<int, string>& grants_by_type, int perm, const ACLGrant& grant)
{
string& s = grants_by_type[perm];
if (!s.empty())
s.append(", ");
- string id_type_str;
- ACLGranteeType& type = grant.get_type();
- switch (type.get_type()) {
- case ACL_TYPE_GROUP:
- id_type_str = "uri";
- break;
- case ACL_TYPE_EMAIL_USER:
- id_type_str = "emailAddress";
- break;
- default:
- id_type_str = "id";
- }
- rgw_user id;
- grant.get_id(id);
- s.append(id_type_str + "=\"" + id.to_str() + "\"");
+ if (const auto user = grant.get_user(); user) {
+ s.append("id=\"" + user->id.to_str() + "\"");
+ } else if (const auto email = grant.get_email(); email) {
+ s.append("emailAddress=\"" + email->address + "\"");
+ } else if (const auto group = grant.get_group(); group) {
+ std::string uri;
+ rgw::s3::acl_group_to_uri(group->type, uri);
+ s.append("uri=\"" + uri + "\"");
+ }
}
struct grant_type_to_header {
@@ -535,7 +530,7 @@ struct grant_type_to_header grants_headers_def[] = {
{ 0, NULL}
};
-static bool grants_by_type_check_perm(map<int, string>& grants_by_type, int perm, ACLGrant& grant, int check_perm)
+static bool grants_by_type_check_perm(map<int, string>& grants_by_type, int perm, const ACLGrant& grant, int check_perm)
{
if ((perm & check_perm) == check_perm) {
grants_by_type_add_one_grant(grants_by_type, check_perm, grant);
@@ -544,7 +539,7 @@ static bool grants_by_type_check_perm(map<int, string>& grants_by_type, int perm
return false;
}
-static void grants_by_type_add_perm(map<int, string>& grants_by_type, int perm, ACLGrant& grant)
+static void grants_by_type_add_perm(map<int, string>& grants_by_type, int perm, const ACLGrant& grant)
{
struct grant_type_to_header *t;
@@ -669,16 +664,13 @@ void RGWRESTGenerateHTTPHeaders::set_http_attrs(const map<string, string>& http_
}
}
-void RGWRESTGenerateHTTPHeaders::set_policy(RGWAccessControlPolicy& policy)
+void RGWRESTGenerateHTTPHeaders::set_policy(const RGWAccessControlPolicy& policy)
{
/* update acl headers */
- RGWAccessControlList& acl = policy.get_acl();
- multimap<string, ACLGrant>& grant_map = acl.get_grant_map();
- multimap<string, ACLGrant>::iterator giter;
+ const RGWAccessControlList& acl = policy.get_acl();
map<int, string> grants_by_type;
- for (giter = grant_map.begin(); giter != grant_map.end(); ++giter) {
- ACLGrant& grant = giter->second;
- ACLPermission& perm = grant.get_permission();
+ for (const auto& [id, grant] : acl.get_grant_map()) {
+ ACLPermission perm = grant.get_permission();
grants_by_type_add_perm(grants_by_type, perm.get_permissions(), grant);
}
add_grants_headers(grants_by_type, *new_env, new_info->x_meta_map);
diff --git a/src/rgw/rgw_rest_client.h b/src/rgw/rgw_rest_client.h
index 6e27576c676..923f8cc1783 100644
--- a/src/rgw/rgw_rest_client.h
+++ b/src/rgw/rgw_rest_client.h
@@ -94,7 +94,7 @@ public:
void set_extra_headers(const std::map<std::string, std::string>& extra_headers);
int set_obj_attrs(const DoutPrefixProvider *dpp, std::map<std::string, bufferlist>& rgw_attrs);
void set_http_attrs(const std::map<std::string, std::string>& http_attrs);
- void set_policy(RGWAccessControlPolicy& policy);
+ void set_policy(const RGWAccessControlPolicy& policy);
int sign(const DoutPrefixProvider *dpp, RGWAccessKey& key, const bufferlist *opt_content);
const std::string& get_url() { return url; }
diff --git a/src/rgw/rgw_rest_metadata.cc b/src/rgw/rgw_rest_metadata.cc
index 23f78819c64..d7881c22890 100644
--- a/src/rgw/rgw_rest_metadata.cc
+++ b/src/rgw/rgw_rest_metadata.cc
@@ -69,9 +69,7 @@ void RGWOp_Metadata_Get::execute(optional_yield y) {
}
void RGWOp_Metadata_Get_Myself::execute(optional_yield y) {
- string owner_id;
-
- owner_id = s->owner.get_id().to_str();
+ const std::string owner_id = s->owner.id.to_str();
s->info.args.append("key", owner_id);
return RGWOp_Metadata_Get::execute(y);
diff --git a/src/rgw/rgw_rest_pubsub.cc b/src/rgw/rgw_rest_pubsub.cc
index 843b7eb7d08..f43a9ef2722 100644
--- a/src/rgw/rgw_rest_pubsub.cc
+++ b/src/rgw/rgw_rest_pubsub.cc
@@ -72,7 +72,7 @@ std::optional<rgw::IAM::Policy> get_policy_from_text(req_state* const s,
const auto bl = bufferlist::static_from_string(policy_text);
try {
return rgw::IAM::Policy(
- s->cct, s->owner.get_id().tenant, bl,
+ s->cct, s->owner.id.tenant, bl,
s->cct->_conf.get_val<bool>("rgw_policy_reject_invalid_principals"));
} catch (rgw::IAM::PolicyParseException& e) {
ldout(s->cct, 1) << "failed to parse policy:' " << policy_text
@@ -86,7 +86,7 @@ int verify_topic_owner_or_policy(req_state* const s,
const rgw_pubsub_topic& topic,
const std::string& zonegroup_name,
const uint64_t op) {
- if (topic.user == s->owner.get_id()) {
+ if (topic.user == s->owner.id) {
return 0;
}
// no policy set.
@@ -187,7 +187,7 @@ class RGWPSCreateTopicOp : public RGWOp {
return ret;
}
- const RGWPubSub ps(driver, s->owner.get_id().tenant);
+ const RGWPubSub ps(driver, s->owner.id.tenant);
rgw_pubsub_topic result;
ret = ps.get_topic(this, topic_name, result, y);
if (ret == -ENOENT) {
@@ -195,7 +195,7 @@ class RGWPSCreateTopicOp : public RGWOp {
return 0;
}
if (ret == 0) {
- if (result.user == s->owner.get_id() ||
+ if (result.user == s->owner.id ||
!s->cct->_conf->mandatory_topic_permissions) {
return 0;
}
@@ -251,9 +251,9 @@ void RGWPSCreateTopicOp::execute(optional_yield y) {
return;
}
}
- const RGWPubSub ps(driver, s->owner.get_id().tenant);
+ const RGWPubSub ps(driver, s->owner.id.tenant);
op_ret = ps.create_topic(this, topic_name, dest, topic_arn, opaque_data,
- s->owner.get_id(), policy_text, y);
+ s->owner.id, policy_text, y);
if (op_ret < 0) {
ldpp_dout(this, 1) << "failed to create topic '" << topic_name << "', ret=" << op_ret << dendl;
return;
@@ -306,7 +306,7 @@ public:
};
void RGWPSListTopicsOp::execute(optional_yield y) {
- const RGWPubSub ps(driver, s->owner.get_id().tenant);
+ const RGWPubSub ps(driver, s->owner.id.tenant);
op_ret = ps.get_topics(this, result, y);
// if there are no topics it is not considered an error
op_ret = op_ret == -ENOENT ? 0 : op_ret;
@@ -393,7 +393,7 @@ void RGWPSGetTopicOp::execute(optional_yield y) {
if (op_ret < 0) {
return;
}
- const RGWPubSub ps(driver, s->owner.get_id().tenant);
+ const RGWPubSub ps(driver, s->owner.id.tenant);
op_ret = ps.get_topic(this, topic_name, result, y);
if (op_ret < 0) {
ldpp_dout(this, 1) << "failed to get topic '" << topic_name << "', ret=" << op_ret << dendl;
@@ -477,7 +477,7 @@ void RGWPSGetTopicAttributesOp::execute(optional_yield y) {
if (op_ret < 0) {
return;
}
- const RGWPubSub ps(driver, s->owner.get_id().tenant);
+ const RGWPubSub ps(driver, s->owner.id.tenant);
op_ret = ps.get_topic(this, topic_name, result, y);
if (op_ret < 0) {
ldpp_dout(this, 1) << "failed to get topic '" << topic_name << "', ret=" << op_ret << dendl;
@@ -605,7 +605,7 @@ class RGWPSSetTopicAttributesOp : public RGWOp {
return ret;
}
rgw_pubsub_topic result;
- const RGWPubSub ps(driver, s->owner.get_id().tenant);
+ const RGWPubSub ps(driver, s->owner.id.tenant);
ret = ps.get_topic(this, topic_name, result, y);
if (ret < 0) {
ldpp_dout(this, 1) << "failed to get topic '" << topic_name
@@ -672,7 +672,7 @@ void RGWPSSetTopicAttributesOp::execute(optional_yield y) {
return;
}
}
- const RGWPubSub ps(driver, s->owner.get_id().tenant);
+ const RGWPubSub ps(driver, s->owner.id.tenant);
op_ret = ps.create_topic(this, topic_name, dest, topic_arn, opaque_data,
topic_owner, policy_text, y);
if (op_ret < 0) {
@@ -742,7 +742,7 @@ void RGWPSDeleteTopicOp::execute(optional_yield y) {
if (op_ret < 0) {
return;
}
- const RGWPubSub ps(driver, s->owner.get_id().tenant);
+ const RGWPubSub ps(driver, s->owner.id.tenant);
rgw_pubsub_topic result;
op_ret = ps.get_topic(this, topic_name, result, y);
if (op_ret == 0) {
@@ -970,7 +970,7 @@ void RGWPSCreateNotifOp::execute(optional_yield y) {
return;
}
- const RGWPubSub ps(driver, s->owner.get_id().tenant);
+ const RGWPubSub ps(driver, s->owner.id.tenant);
const RGWPubSub::Bucket b(ps, bucket.get());
if(configurations.list.empty()) {
@@ -1042,7 +1042,7 @@ void RGWPSCreateNotifOp::execute(optional_yield y) {
// ARN is cached to make the "GET" method faster
op_ret = ps.create_topic(this, unique_topic_name, topic_info.dest,
topic_info.arn, topic_info.opaque_data,
- s->owner.get_id(), topic_info.policy_text, y);
+ s->owner.id, topic_info.policy_text, y);
if (op_ret < 0) {
ldpp_dout(this, 1) << "failed to auto-generate unique topic '" << unique_topic_name <<
"', ret=" << op_ret << dendl;
@@ -1118,7 +1118,7 @@ void RGWPSDeleteNotifOp::execute(optional_yield y) {
return;
}
- const RGWPubSub ps(driver, s->owner.get_id().tenant);
+ const RGWPubSub ps(driver, s->owner.id.tenant);
const RGWPubSub::Bucket b(ps, bucket.get());
// get all topics on a bucket
@@ -1215,7 +1215,7 @@ void RGWPSListNotifsOp::execute(optional_yield y) {
return;
}
- const RGWPubSub ps(driver, s->owner.get_id().tenant);
+ const RGWPubSub ps(driver, s->owner.id.tenant);
const RGWPubSub::Bucket b(ps, bucket.get());
// get all topics on a bucket
diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc
index f12d5e73300..8e0e5a9924c 100644
--- a/src/rgw/rgw_rest_s3.cc
+++ b/src/rgw/rgw_rest_s3.cc
@@ -2383,17 +2383,19 @@ void RGWStatBucket_ObjStore_S3::send_response()
}
static int create_s3_policy(req_state *s, rgw::sal::Driver* driver,
- RGWAccessControlPolicy_S3& s3policy,
- ACLOwner& owner)
+ RGWAccessControlPolicy& policy,
+ const ACLOwner& owner)
{
if (s->has_acl_header) {
if (!s->canned_acl.empty())
return -ERR_INVALID_REQUEST;
- return s3policy.create_from_headers(s, driver, s->info.env, owner);
+ return rgw::s3::create_policy_from_headers(s, driver, owner,
+ *s->info.env, policy);
}
- return s3policy.create_canned(owner, s->bucket_owner, s->canned_acl);
+ return rgw::s3::create_canned_acl(owner, s->bucket_owner,
+ s->canned_acl, policy);
}
class RGWLocationConstraint : public XMLObj
@@ -2447,7 +2449,6 @@ public:
int RGWCreateBucket_ObjStore_S3::get_params(optional_yield y)
{
- RGWAccessControlPolicy_S3 s3policy(s->cct);
bool relaxed_names = s->cct->_conf->rgw_relaxed_s3_bucket_names;
int r;
@@ -2456,12 +2457,10 @@ int RGWCreateBucket_ObjStore_S3::get_params(optional_yield y)
if (r) return r;
}
- r = create_s3_policy(s, driver, s3policy, s->owner);
+ r = create_s3_policy(s, driver, policy, s->owner);
if (r < 0)
return r;
- policy = s3policy;
-
const auto max_size = s->cct->_conf->rgw_max_put_param_size;
int op_ret = 0;
@@ -2589,13 +2588,10 @@ int RGWPutObj_ObjStore_S3::get_params(optional_yield y)
return ret;
}
- RGWAccessControlPolicy_S3 s3policy(s->cct);
- ret = create_s3_policy(s, driver, s3policy, s->owner);
+ ret = create_s3_policy(s, driver, policy, s->owner);
if (ret < 0)
return ret;
- policy = s3policy;
-
if_match = s->info.env->get("HTTP_IF_MATCH");
if_nomatch = s->info.env->get("HTTP_IF_NONE_MATCH");
@@ -3162,8 +3158,8 @@ int RGWPostObj_ObjStore_S3::get_policy(optional_yield y)
return -EACCES;
} else {
/* Populate the owner info. */
- s->owner.set_id(s->user->get_id());
- s->owner.set_name(s->user->get_display_name());
+ s->owner.id = s->user->get_id();
+ s->owner.display_name = s->user->get_display_name();
ldpp_dout(this, 20) << "Successful Signature Verification!" << dendl;
}
@@ -3215,15 +3211,14 @@ int RGWPostObj_ObjStore_S3::get_policy(optional_yield y)
string canned_acl;
part_str(parts, "acl", &canned_acl);
- RGWAccessControlPolicy_S3 s3policy(s->cct);
ldpp_dout(this, 20) << "canned_acl=" << canned_acl << dendl;
- if (s3policy.create_canned(s->owner, s->bucket_owner, canned_acl) < 0) {
+ int r = rgw::s3::create_canned_acl(s->owner, s->bucket_owner,
+ canned_acl, policy);
+ if (r < 0) {
err_msg = "Bad canned ACLs";
- return -EINVAL;
+ return r;
}
- policy = s3policy;
-
return 0;
}
@@ -3383,6 +3378,9 @@ done:
if (op_ret >= 0) {
dump_content_length(s, s->formatter->get_len());
}
+ if (op_ret == STATUS_NO_CONTENT) {
+ dump_etag(s, etag);
+ }
end_header(s, this);
if (op_ret != STATUS_CREATED)
return;
@@ -3450,16 +3448,8 @@ void RGWDeleteObj_ObjStore_S3::send_response()
int RGWCopyObj_ObjStore_S3::init_dest_policy()
{
- RGWAccessControlPolicy_S3 s3policy(s->cct);
-
/* build a policy for the target object */
- int r = create_s3_policy(s, driver, s3policy, s->owner);
- if (r < 0)
- return r;
-
- dest_policy = s3policy;
-
- return 0;
+ return create_s3_policy(s, driver, dest_policy, s->owner);
}
int RGWCopyObj_ObjStore_S3::get_params(optional_yield y)
@@ -3621,25 +3611,16 @@ int RGWPutACLs_ObjStore_S3::get_params(optional_yield y)
return ret;
}
-int RGWPutACLs_ObjStore_S3::get_policy_from_state(rgw::sal::Driver* driver,
- req_state *s,
- stringstream& ss)
+int RGWPutACLs_ObjStore_S3::get_policy_from_state(const ACLOwner& owner,
+ RGWAccessControlPolicy& policy)
{
- RGWAccessControlPolicy_S3 s3policy(s->cct);
-
// bucket-* canned acls do not apply to bucket
if (rgw::sal::Object::empty(s->object.get())) {
if (s->canned_acl.find("bucket") != string::npos)
s->canned_acl.clear();
}
- int r = create_s3_policy(s, driver, s3policy, owner);
- if (r < 0)
- return r;
-
- s3policy.to_xml(ss);
-
- return 0;
+ return create_s3_policy(s, driver, policy, owner);
}
void RGWPutACLs_ObjStore_S3::send_response()
@@ -3972,14 +3953,7 @@ int RGWInitMultipart_ObjStore_S3::get_params(optional_yield y)
return ret;
}
- RGWAccessControlPolicy_S3 s3policy(s->cct);
- ret = create_s3_policy(s, driver, s3policy, s->owner);
- if (ret < 0)
- return ret;
-
- policy = s3policy;
-
- return 0;
+ return create_s3_policy(s, driver, policy, s->owner);
}
void RGWInitMultipart_ObjStore_S3::send_response()
@@ -4107,7 +4081,7 @@ void RGWListMultipart_ObjStore_S3::send_response()
s->formatter->dump_string("IsTruncated", (truncated ? "true" : "false"));
ACLOwner& owner = policy.get_owner();
- dump_owner(s, owner.get_id(), owner.get_display_name());
+ dump_owner(s, owner.id, owner.display_name);
for (; iter != upload->get_parts().end(); ++iter) {
rgw::sal::MultipartPart* part = iter->second.get();
@@ -4170,8 +4144,8 @@ void RGWListBucketMultiparts_ObjStore_S3::send_response()
}
s->formatter->dump_string("UploadId", upload->get_upload_id());
const ACLOwner& owner = upload->get_owner();
- dump_owner(s, owner.get_id(), owner.get_display_name(), "Initiator");
- dump_owner(s, owner.get_id(), owner.get_display_name()); // Owner
+ dump_owner(s, owner.id, owner.display_name, "Initiator");
+ dump_owner(s, owner.id, owner.display_name); // Owner
s->formatter->dump_string("StorageClass", "STANDARD");
dump_time(s, "Initiated", upload->get_mtime());
s->formatter->close_section();
@@ -5098,8 +5072,8 @@ int RGW_Auth_S3::authorize(const DoutPrefixProvider *dpp,
const auto ret = rgw::auth::Strategy::apply(dpp, auth_registry.get_s3_main(), s, y);
if (ret == 0) {
/* Populate the owner info. */
- s->owner.set_id(s->user->get_id());
- s->owner.set_name(s->user->get_display_name());
+ s->owner.id = s->user->get_id();
+ s->owner.display_name = s->user->get_display_name();
}
return ret;
}
diff --git a/src/rgw/rgw_rest_s3.h b/src/rgw/rgw_rest_s3.h
index c73971a5fe6..d15ddaba35a 100644
--- a/src/rgw/rgw_rest_s3.h
+++ b/src/rgw/rgw_rest_s3.h
@@ -355,7 +355,8 @@ public:
RGWPutACLs_ObjStore_S3() {}
~RGWPutACLs_ObjStore_S3() override {}
- int get_policy_from_state(rgw::sal::Driver* driver, req_state *s, std::stringstream& ss) override;
+ int get_policy_from_state(const ACLOwner& owner,
+ RGWAccessControlPolicy& p) override;
void send_response() override;
int get_params(optional_yield y) override;
};
diff --git a/src/rgw/rgw_rest_swift.cc b/src/rgw/rgw_rest_swift.cc
index 25ba6184161..46f02805cd1 100644
--- a/src/rgw/rgw_rest_swift.cc
+++ b/src/rgw/rgw_rest_swift.cc
@@ -92,7 +92,7 @@ static void dump_account_metadata(req_state * const s,
/* const */map<string, bufferlist>& attrs,
const RGWQuotaInfo& quota,
int32_t max_buckets,
- const RGWAccessControlPolicy_SWIFTAcct &policy)
+ const RGWAccessControlPolicy& policy)
{
/* Adding X-Timestamp to keep align with Swift API */
dump_header(s, "X-Timestamp", ceph_clock_now());
@@ -164,8 +164,8 @@ static void dump_account_metadata(req_state * const s,
}
}
- /* Dump account ACLs */
- auto account_acls = policy.to_str();
+ /* Dump account ACLs, if any */
+ auto account_acls = rgw::swift::format_account_acl(policy);
if (account_acls) {
dump_header(s, "X-Account-Access-Control", std::move(*account_acls));
}
@@ -188,7 +188,7 @@ void RGWListBuckets_ObjStore_SWIFT::send_response_begin(bool has_buckets)
s->user->get_attrs(),
s->user->get_info().quota.user_quota,
s->user->get_max_buckets(),
- static_cast<RGWAccessControlPolicy_SWIFTAcct&>(*s->user_acl));
+ s->user_acl);
dump_errno(s);
dump_header(s, "Accept-Ranges", "bytes");
end_header(s, NULL, NULL, NO_CONTENT_LENGTH, true);
@@ -287,7 +287,7 @@ void RGWListBuckets_ObjStore_SWIFT::send_response_end()
s->user->get_attrs(),
s->user->get_info().quota.user_quota,
s->user->get_max_buckets(),
- static_cast<RGWAccessControlPolicy_SWIFTAcct&>(*s->user_acl));
+ s->user_acl);
dump_errno(s);
end_header(s, nullptr, nullptr, s->formatter->get_len(), true);
}
@@ -470,10 +470,8 @@ static void dump_container_metadata(req_state *s,
}
if (rgw::sal::Object::empty(s->object.get())) {
- auto swift_policy = \
- static_cast<RGWAccessControlPolicy_SWIFT*>(s->bucket_acl.get());
std::string read_acl, write_acl;
- swift_policy->to_str(read_acl, write_acl);
+ rgw::swift::format_container_acls(s->bucket_acl, read_acl, write_acl);
if (read_acl.size()) {
dump_header(s, "X-Container-Read", read_acl);
@@ -567,7 +565,7 @@ void RGWStatAccount_ObjStore_SWIFT::send_response()
attrs,
s->user->get_info().quota.user_quota,
s->user->get_max_buckets(),
- static_cast<RGWAccessControlPolicy_SWIFTAcct&>(*s->user_acl));
+ s->user_acl);
}
set_req_state_err(s, op_ret);
@@ -595,7 +593,7 @@ void RGWStatBucket_ObjStore_SWIFT::send_response()
static int get_swift_container_settings(req_state * const s,
rgw::sal::Driver* const driver,
- RGWAccessControlPolicy * const policy,
+ RGWAccessControlPolicy& policy,
bool * const has_policy,
uint32_t * rw_mask,
RGWCORSConfiguration * const cors_config,
@@ -607,18 +605,17 @@ static int get_swift_container_settings(req_state * const s,
*has_policy = false;
if (read_list || write_list) {
- RGWAccessControlPolicy_SWIFT swift_policy(s->cct);
- const auto r = swift_policy.create(s, driver,
- s->user->get_id(),
- s->user->get_display_name(),
- read_list,
- write_list,
- *rw_mask);
+ int r = rgw::swift::create_container_policy(s, driver,
+ s->user->get_id(),
+ s->user->get_display_name(),
+ read_list,
+ write_list,
+ *rw_mask,
+ policy);
if (r < 0) {
return r;
}
- *policy = swift_policy;
*has_policy = true;
}
@@ -716,7 +713,7 @@ int RGWCreateBucket_ObjStore_SWIFT::get_params(optional_yield y)
bool has_policy;
uint32_t policy_rw_mask = 0;
- int r = get_swift_container_settings(s, driver, &policy, &has_policy,
+ int r = get_swift_container_settings(s, driver, policy, &has_policy,
&policy_rw_mask, &cors_config, &has_cors);
if (r < 0) {
return r;
@@ -1060,23 +1057,21 @@ void RGWPutObj_ObjStore_SWIFT::send_response()
static int get_swift_account_settings(req_state * const s,
rgw::sal::Driver* const driver,
- RGWAccessControlPolicy_SWIFTAcct* const policy,
+ RGWAccessControlPolicy& policy,
bool * const has_policy)
{
*has_policy = false;
const char * const acl_attr = s->info.env->get("HTTP_X_ACCOUNT_ACCESS_CONTROL");
if (acl_attr) {
- RGWAccessControlPolicy_SWIFTAcct swift_acct_policy(s->cct);
- const bool r = swift_acct_policy.create(s, driver,
- s->user->get_id(),
- s->user->get_display_name(),
- string(acl_attr));
- if (r != true) {
- return -EINVAL;
+ int r = rgw::swift::create_account_policy(s, driver,
+ s->user->get_id(),
+ s->user->get_display_name(),
+ acl_attr, policy);
+ if (r < 0) {
+ return r;
}
- *policy = swift_acct_policy;
*has_policy = true;
}
@@ -1089,12 +1084,7 @@ int RGWPutMetadataAccount_ObjStore_SWIFT::get_params(optional_yield y)
return -EINVAL;
}
- int ret = get_swift_account_settings(s,
- driver,
- // FIXME: we need to carry unique_ptr in generic class
- // and allocate appropriate ACL class in the ctor
- static_cast<RGWAccessControlPolicy_SWIFTAcct *>(&policy),
- &has_policy);
+ int ret = get_swift_account_settings(s, driver, policy, &has_policy);
if (ret < 0) {
return ret;
}
@@ -1127,7 +1117,7 @@ int RGWPutMetadataBucket_ObjStore_SWIFT::get_params(optional_yield y)
return -EINVAL;
}
- int r = get_swift_container_settings(s, driver, &policy, &has_policy,
+ int r = get_swift_container_settings(s, driver, policy, &has_policy,
&policy_rw_mask, &cors_config, &has_cors);
if (r < 0) {
return r;
diff --git a/src/rgw/rgw_sal.cc b/src/rgw/rgw_sal.cc
index 9aada70899f..6c0212dae81 100644
--- a/src/rgw/rgw_sal.cc
+++ b/src/rgw/rgw_sal.cc
@@ -270,17 +270,18 @@ rgw::sal::Driver* DriverManager::init_raw_storage_provider(const DoutPrefixProvi
rados->set_context(cct);
- int ret = rados->init_svc(true, dpp);
- if (ret < 0) {
- ldout(cct, 0) << "ERROR: failed to init services (ret=" << cpp_strerror(-ret) << ")" << dendl;
+ if (rados->init_rados() < 0) {
delete driver;
return nullptr;
}
- if (rados->init_rados() < 0) {
+ int ret = rados->init_svc(true, dpp);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: failed to init services (ret=" << cpp_strerror(-ret) << ")" << dendl;
delete driver;
return nullptr;
}
+
if (driver->initialize(cct, dpp) < 0) {
delete driver;
return nullptr;
diff --git a/src/rgw/rgw_sal.h b/src/rgw/rgw_sal.h
index cc45d5f3a2c..8771b74c87a 100644
--- a/src/rgw/rgw_sal.h
+++ b/src/rgw/rgw_sal.h
@@ -15,6 +15,9 @@
#pragma once
+#include <boost/intrusive_ptr.hpp>
+#include <boost/smart_ptr/intrusive_ref_counter.hpp>
+
#include "common/tracer.h"
#include "rgw_sal_fwd.h"
#include "rgw_lua.h"
@@ -28,7 +31,6 @@
struct RGWBucketEnt;
class RGWRESTMgr;
-class RGWAccessListFilter;
class RGWLC;
struct rgw_user_bucket;
class RGWUsageBatch;
@@ -81,32 +83,6 @@ struct RGWClusterStat {
uint64_t num_objects;
};
-class RGWGetBucketStats_CB : public RefCountedObject {
-protected:
- rgw_bucket bucket;
- std::map<RGWObjCategory, RGWStorageStats>* stats;
-public:
- explicit RGWGetBucketStats_CB(const rgw_bucket& _bucket) : bucket(_bucket), stats(NULL) {}
- ~RGWGetBucketStats_CB() override {}
- virtual void handle_response(int r) = 0;
- virtual void set_response(std::map<RGWObjCategory, RGWStorageStats>* _stats) {
- stats = _stats;
- }
-};
-
-class RGWGetUserStats_CB : public RefCountedObject {
-protected:
- rgw_user user;
- RGWStorageStats stats;
-public:
- explicit RGWGetUserStats_CB(const rgw_user& _user) : user(_user) {}
- ~RGWGetUserStats_CB() override {}
- virtual void handle_response(int r) = 0;
- virtual void set_response(RGWStorageStats& _stats) {
- stats = _stats;
- }
-};
-
struct RGWObjState {
rgw_obj obj;
bool is_atomic{false};
@@ -449,6 +425,14 @@ class Driver {
virtual void register_admin_apis(RGWRESTMgr* mgr) = 0;
};
+
+/// \brief Ref-counted callback object for User/Bucket read_stats_async().
+class ReadStatsCB : public boost::intrusive_ref_counter<ReadStatsCB> {
+ public:
+ virtual ~ReadStatsCB() {}
+ virtual void handle_response(int r, const RGWStorageStats& stats) = 0;
+};
+
/**
* @brief A list of buckets
*
@@ -530,7 +514,8 @@ class User {
ceph::real_time* last_stats_sync = nullptr,
ceph::real_time* last_stats_update = nullptr) = 0;
/** Read the User stats from the backing Store, asynchronous */
- virtual int read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb) = 0;
+ virtual int read_stats_async(const DoutPrefixProvider *dpp,
+ boost::intrusive_ptr<ReadStatsCB> cb) = 0;
/** Flush accumulated stat changes for this User to the backing store */
virtual int complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) = 0;
/** Read detailed usage stats for this User from the backing store */
@@ -596,7 +581,7 @@ class Bucket {
rgw_obj_key end_marker;
std::string ns;
bool enforce_ns{true};
- RGWAccessListFilter* access_list_filter{nullptr};
+ rgw::AccessListFilter access_list_filter{};
RGWBucketListNameFilter force_check_filter;
bool list_versions{false};
bool allow_unordered{false};
@@ -683,7 +668,7 @@ class Bucket {
/** Read the bucket stats from the backing Store, asynchronous */
virtual int read_stats_async(const DoutPrefixProvider *dpp,
const bucket_index_layout_generation& idx_layout,
- int shard_id, RGWGetBucketStats_CB* ctx) = 0;
+ int shard_id, boost::intrusive_ptr<ReadStatsCB> cb) = 0;
/** Sync this bucket's stats to the owning user's stats in the backing store */
virtual int sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y,
RGWBucketEnt* optional_ent) = 0;
diff --git a/src/rgw/rgw_sal_dbstore.cc b/src/rgw/rgw_sal_dbstore.cc
index 1a28a7724f4..308404c87b8 100644
--- a/src/rgw/rgw_sal_dbstore.cc
+++ b/src/rgw/rgw_sal_dbstore.cc
@@ -90,7 +90,7 @@ namespace rgw::sal {
}
/* stats - Not for first pass */
- int DBUser::read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB *cb)
+ int DBUser::read_stats_async(const DoutPrefixProvider *dpp, boost::intrusive_ptr<ReadStatsCB> cb)
{
return 0;
}
@@ -221,7 +221,7 @@ namespace rgw::sal {
return 0;
}
- int DBBucket::read_stats_async(const DoutPrefixProvider *dpp, const bucket_index_layout_generation& idx_layout, int shard_id, RGWGetBucketStats_CB *ctx)
+ int DBBucket::read_stats_async(const DoutPrefixProvider *dpp, const bucket_index_layout_generation& idx_layout, int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx)
{
return 0;
}
@@ -354,7 +354,7 @@ namespace rgw::sal {
Attrs attrs = get_attrs();
attrs[RGW_ATTR_ACL] = aclbl;
- ret = store->getDB()->update_bucket(dpp, "attrs", info, false, &(acl.get_owner().get_id()), &attrs, nullptr, nullptr);
+ ret = store->getDB()->update_bucket(dpp, "attrs", info, false, &acl.get_owner().id, &attrs, nullptr, nullptr);
return ret;
}
@@ -701,7 +701,7 @@ namespace rgw::sal {
int DBObject::DBDeleteOp::delete_obj(const DoutPrefixProvider* dpp, optional_yield y)
{
- parent_op.params.bucket_owner = params.bucket_owner.get_id();
+ parent_op.params.bucket_owner = params.bucket_owner.id;
parent_op.params.versioning_status = params.versioning_status;
parent_op.params.obj_owner = params.obj_owner;
parent_op.params.olh_epoch = params.olh_epoch;
@@ -793,7 +793,7 @@ namespace rgw::sal {
int ret;
std::unique_ptr<rgw::sal::Object::DeleteOp> del_op = meta_obj->get_delete_op();
- del_op->params.bucket_owner = bucket->get_info().owner;
+ del_op->params.bucket_owner.id = bucket->get_info().owner;
del_op->params.versioning_status = 0;
// Since the data objects are associated with meta obj till
@@ -833,7 +833,7 @@ namespace rgw::sal {
DB::Object::Write obj_op(&op_target);
/* Create meta object */
- obj_op.meta.owner = owner.get_id();
+ obj_op.meta.owner = owner.id;
obj_op.meta.category = RGWObjCategory::MultiMeta;
obj_op.meta.flags = PUT_OBJ_CREATE_EXCL;
obj_op.meta.mtime = &mtime;
@@ -1012,7 +1012,7 @@ namespace rgw::sal {
DB::Object::Write obj_op(&op_target);
ret = obj_op.prepare(dpp);
- obj_op.meta.owner = owner.get_id();
+ obj_op.meta.owner = owner.id;
obj_op.meta.flags = PUT_OBJ_CREATE;
obj_op.meta.category = RGWObjCategory::Main;
obj_op.meta.modify_tail = true;
diff --git a/src/rgw/rgw_sal_dbstore.h b/src/rgw/rgw_sal_dbstore.h
index 646ee23f887..bfa424e269f 100644
--- a/src/rgw/rgw_sal_dbstore.h
+++ b/src/rgw/rgw_sal_dbstore.h
@@ -93,7 +93,7 @@ protected:
optional_yield y, RGWStorageStats* stats,
ceph::real_time *last_stats_sync = nullptr,
ceph::real_time *last_stats_update = nullptr) override;
- virtual int read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb) override;
+ virtual int read_stats_async(const DoutPrefixProvider *dpp, boost::intrusive_ptr<ReadStatsCB> cb) override;
virtual int complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) override;
virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
bool* is_truncated, RGWUsageIter& usage_iter,
@@ -155,7 +155,7 @@ protected:
std::map<RGWObjCategory, RGWStorageStats>& stats,
std::string *max_marker = nullptr,
bool *syncstopped = nullptr) override;
- virtual int read_stats_async(const DoutPrefixProvider *dpp, const bucket_index_layout_generation& idx_layout, int shard_id, RGWGetBucketStats_CB* ctx) override;
+ virtual int read_stats_async(const DoutPrefixProvider *dpp, const bucket_index_layout_generation& idx_layout, int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx) override;
int sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y,
RGWBucketEnt* ent) override;
int check_bucket_shards(const DoutPrefixProvider *dpp,
diff --git a/src/rgw/rgw_sal_filter.cc b/src/rgw/rgw_sal_filter.cc
index b49c6887b62..decd648adb0 100644
--- a/src/rgw/rgw_sal_filter.cc
+++ b/src/rgw/rgw_sal_filter.cc
@@ -508,7 +508,7 @@ int FilterUser::read_stats(const DoutPrefixProvider *dpp,
return next->read_stats(dpp, y, stats, last_stats_sync, last_stats_update);
}
-int FilterUser::read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB* cb)
+int FilterUser::read_stats_async(const DoutPrefixProvider *dpp, boost::intrusive_ptr<ReadStatsCB> cb)
{
return next->read_stats_async(dpp, cb);
}
@@ -613,7 +613,7 @@ int FilterBucket::read_stats(const DoutPrefixProvider *dpp,
int FilterBucket::read_stats_async(const DoutPrefixProvider *dpp,
const bucket_index_layout_generation& idx_layout,
- int shard_id, RGWGetBucketStats_CB* ctx)
+ int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx)
{
return next->read_stats_async(dpp, idx_layout, shard_id, ctx);
}
diff --git a/src/rgw/rgw_sal_filter.h b/src/rgw/rgw_sal_filter.h
index 239a80a6465..aebbe383714 100644
--- a/src/rgw/rgw_sal_filter.h
+++ b/src/rgw/rgw_sal_filter.h
@@ -338,7 +338,7 @@ public:
ceph::real_time* last_stats_sync = nullptr,
ceph::real_time* last_stats_update = nullptr) override;
virtual int read_stats_async(const DoutPrefixProvider *dpp,
- RGWGetUserStats_CB* cb) override;
+ boost::intrusive_ptr<ReadStatsCB> cb) override;
virtual int complete_flush_stats(const DoutPrefixProvider *dpp, optional_yield y) override;
virtual int read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch,
uint64_t end_epoch, uint32_t max_entries,
@@ -397,7 +397,7 @@ public:
bool* syncstopped = nullptr) override;
virtual int read_stats_async(const DoutPrefixProvider *dpp,
const bucket_index_layout_generation& idx_layout,
- int shard_id, RGWGetBucketStats_CB* ctx) override;
+ int shard_id, boost::intrusive_ptr<ReadStatsCB> ctx) override;
int sync_user_stats(const DoutPrefixProvider *dpp, optional_yield y,
RGWBucketEnt* ent) override;
int check_bucket_shards(const DoutPrefixProvider* dpp,
diff --git a/src/rgw/rgw_sal_fwd.h b/src/rgw/rgw_sal_fwd.h
index 08866c2bea2..123d17a5162 100644
--- a/src/rgw/rgw_sal_fwd.h
+++ b/src/rgw/rgw_sal_fwd.h
@@ -15,8 +15,21 @@
#pragma once
+#include <functional>
+#include <string>
-namespace rgw { namespace sal {
+namespace rgw {
+using AccessListFilter =
+ std::function<bool(const std::string&, std::string&)>;
+
+inline auto AccessListFilterPrefix(std::string prefix) {
+ return [prefix = std::move(prefix)](const std::string& name,
+ std::string& key) {
+ return (prefix.compare(key.substr(0, prefix.size())) == 0);
+ };
+}
+
+namespace sal {
class Driver;
class User;
@@ -32,6 +45,9 @@ namespace rgw { namespace sal {
class Zone;
class LuaManager;
struct RGWRoleInfo;
+ class DataProcessor;
+ class ObjectProcessor;
+ class ReadStatsCB;
class ConfigStore;
class RealmWriter;
diff --git a/src/rgw/rgw_user_types.h b/src/rgw/rgw_user_types.h
index c9a1a46ade1..1aaf4cfa5d3 100644
--- a/src/rgw/rgw_user_types.h
+++ b/src/rgw/rgw_user_types.h
@@ -26,9 +26,10 @@
#include "common/Formatter.h"
struct rgw_user {
+ // note: order of member variables matches the sort order of operator<=>
std::string tenant;
- std::string id;
std::string ns;
+ std::string id;
rgw_user() {}
explicit rgw_user(const std::string& s) {
@@ -36,13 +37,13 @@ struct rgw_user {
}
rgw_user(const std::string& tenant, const std::string& id, const std::string& ns="")
: tenant(tenant),
- id(id),
- ns(ns) {
+ ns(ns),
+ id(id) {
}
rgw_user(std::string&& tenant, std::string&& id, std::string&& ns="")
: tenant(std::move(tenant)),
- id(std::move(id)),
- ns(std::move(ns)) {
+ ns(std::move(ns)),
+ id(std::move(id)) {
}
void encode(ceph::buffer::list& bl) const {
@@ -118,40 +119,8 @@ struct rgw_user {
return *this;
}
- int compare(const rgw_user& u) const {
- int r = tenant.compare(u.tenant);
- if (r != 0)
- return r;
- r = ns.compare(u.ns);
- if (r != 0) {
- return r;
- }
- return id.compare(u.id);
- }
- int compare(const std::string& str) const {
- rgw_user u(str);
- return compare(u);
- }
+ friend auto operator<=>(const rgw_user&, const rgw_user&) = default;
- bool operator!=(const rgw_user& rhs) const {
- return (compare(rhs) != 0);
- }
- bool operator==(const rgw_user& rhs) const {
- return (compare(rhs) == 0);
- }
- bool operator<(const rgw_user& rhs) const {
- if (tenant < rhs.tenant) {
- return true;
- } else if (tenant > rhs.tenant) {
- return false;
- }
- if (ns < rhs.ns) {
- return true;
- } else if (ns > rhs.ns) {
- return false;
- }
- return (id < rhs.id);
- }
void dump(ceph::Formatter *f) const;
static void generate_test_instances(std::list<rgw_user*>& o);
};
diff --git a/src/rgw/rgw_zone.cc b/src/rgw/rgw_zone.cc
index f41d987ec54..aeb58e2f48f 100644
--- a/src/rgw/rgw_zone.cc
+++ b/src/rgw/rgw_zone.cc
@@ -33,7 +33,6 @@ std::string zonegroup_names_oid_prefix = "zonegroups_names.";
std::string RGW_DEFAULT_ZONE_ROOT_POOL = "rgw.root";
std::string RGW_DEFAULT_ZONEGROUP_ROOT_POOL = "rgw.root";
std::string RGW_DEFAULT_PERIOD_ROOT_POOL = "rgw.root";
-std::string avail_pools = ".pools.avail";
std::string default_storage_pool_suffix = "rgw.buckets.data";
}
@@ -412,22 +411,14 @@ int RGWZoneParams::set_as_default(const DoutPrefixProvider *dpp, optional_yield
int RGWZoneParams::create(const DoutPrefixProvider *dpp, optional_yield y, bool exclusive)
{
- /* check for old pools config */
- rgw_raw_obj obj(domain_root, avail_pools);
- auto sysobj = sysobj_svc->get_obj(obj);
- int r = sysobj.rop().stat(y, dpp);
- if (r < 0) {
- ldpp_dout(dpp, 10) << "couldn't find old data placement pools config, setting up new ones for the zone" << dendl;
- /* a new system, let's set new placement info */
- RGWZonePlacementInfo default_placement;
- default_placement.index_pool = name + "." + default_bucket_index_pool_suffix;
- rgw_pool pool = name + "." + default_storage_pool_suffix;
- default_placement.storage_classes.set_storage_class(RGW_STORAGE_CLASS_STANDARD, &pool, nullptr);
- default_placement.data_extra_pool = name + "." + default_storage_extra_pool_suffix;
- placement_pools["default-placement"] = default_placement;
- }
+ RGWZonePlacementInfo default_placement;
+ default_placement.index_pool = name + "." + default_bucket_index_pool_suffix;
+ rgw_pool pool = name + "." + default_storage_pool_suffix;
+ default_placement.storage_classes.set_storage_class(RGW_STORAGE_CLASS_STANDARD, &pool, nullptr);
+ default_placement.data_extra_pool = name + "." + default_storage_extra_pool_suffix;
+ placement_pools["default-placement"] = default_placement;
- r = fix_pool_names(dpp, y);
+ int r = fix_pool_names(dpp, y);
if (r < 0) {
ldpp_dout(dpp, 0) << "ERROR: fix_pool_names returned r=" << r << dendl;
return r;
diff --git a/src/rgw/services/svc_bi_rados.cc b/src/rgw/services/svc_bi_rados.cc
index 6002b986f59..20c842c3805 100644
--- a/src/rgw/services/svc_bi_rados.cc
+++ b/src/rgw/services/svc_bi_rados.cc
@@ -22,29 +22,27 @@ RGWSI_BucketIndex_RADOS::RGWSI_BucketIndex_RADOS(CephContext *cct) : RGWSI_Bucke
}
void RGWSI_BucketIndex_RADOS::init(RGWSI_Zone *zone_svc,
- RGWSI_RADOS *rados_svc,
- RGWSI_BILog_RADOS *bilog_svc,
- RGWDataChangesLog *datalog_rados_svc)
+ librados::Rados* rados_,
+ RGWSI_BILog_RADOS *bilog_svc,
+ RGWDataChangesLog *datalog_rados_svc)
{
svc.zone = zone_svc;
- svc.rados = rados_svc;
+ rados = rados_;
svc.bilog = bilog_svc;
svc.datalog_rados = datalog_rados_svc;
}
int RGWSI_BucketIndex_RADOS::open_pool(const DoutPrefixProvider *dpp,
const rgw_pool& pool,
- RGWSI_RADOS::Pool *index_pool,
+ librados::IoCtx* index_pool,
bool mostly_omap)
{
- *index_pool = svc.rados->pool(pool);
- return index_pool->open(dpp, RGWSI_RADOS::OpenParams()
- .set_mostly_omap(mostly_omap));
+ return rgw_init_ioctx(dpp, rados, pool, *index_pool, true, mostly_omap);
}
int RGWSI_BucketIndex_RADOS::open_bucket_index_pool(const DoutPrefixProvider *dpp,
const RGWBucketInfo& bucket_info,
- RGWSI_RADOS::Pool *index_pool)
+ librados::IoCtx* index_pool)
{
const rgw_pool& explicit_pool = bucket_info.bucket.explicit_placement.index_pool;
@@ -74,7 +72,7 @@ int RGWSI_BucketIndex_RADOS::open_bucket_index_pool(const DoutPrefixProvider *dp
int RGWSI_BucketIndex_RADOS::open_bucket_index_base(const DoutPrefixProvider *dpp,
const RGWBucketInfo& bucket_info,
- RGWSI_RADOS::Pool *index_pool,
+ librados::IoCtx* index_pool,
string *bucket_oid_base)
{
const rgw_bucket& bucket = bucket_info.bucket;
@@ -96,7 +94,7 @@ int RGWSI_BucketIndex_RADOS::open_bucket_index_base(const DoutPrefixProvider *dp
int RGWSI_BucketIndex_RADOS::open_bucket_index(const DoutPrefixProvider *dpp,
const RGWBucketInfo& bucket_info,
- RGWSI_RADOS::Pool *index_pool,
+ librados::IoCtx* index_pool,
string *bucket_oid)
{
const rgw_bucket& bucket = bucket_info.bucket;
@@ -194,7 +192,7 @@ int RGWSI_BucketIndex_RADOS::open_bucket_index(const DoutPrefixProvider *dpp,
const RGWBucketInfo& bucket_info,
std::optional<int> _shard_id,
const rgw::bucket_index_layout_generation& idx_layout,
- RGWSI_RADOS::Pool *index_pool,
+ librados::IoCtx* index_pool,
map<int, string> *bucket_objs,
map<int, string> *bucket_instance_ids)
{
@@ -277,32 +275,27 @@ int RGWSI_BucketIndex_RADOS::get_bucket_index_object(
int RGWSI_BucketIndex_RADOS::open_bucket_index_shard(const DoutPrefixProvider *dpp,
const RGWBucketInfo& bucket_info,
const string& obj_key,
- RGWSI_RADOS::Obj *bucket_obj,
+ rgw_rados_ref* bucket_obj,
int *shard_id)
{
string bucket_oid_base;
- RGWSI_RADOS::Pool pool;
-
- int ret = open_bucket_index_base(dpp, bucket_info, &pool, &bucket_oid_base);
+ int ret = open_bucket_index_base(dpp, bucket_info, &bucket_obj->ioctx, &bucket_oid_base);
if (ret < 0) {
ldpp_dout(dpp, 20) << __func__ << ": open_bucket_index_pool() returned "
<< ret << dendl;
return ret;
}
- string oid;
-
const auto& current_index = bucket_info.layout.current_index;
ret = get_bucket_index_object(bucket_oid_base, current_index.layout.normal,
- current_index.gen, obj_key, &oid, shard_id);
+ current_index.gen, obj_key,
+ &bucket_obj->obj.oid, shard_id);
if (ret < 0) {
ldpp_dout(dpp, 10) << "get_bucket_index_object() returned ret=" << ret << dendl;
return ret;
}
- *bucket_obj = svc.rados->obj(pool, oid);
-
return 0;
}
@@ -310,23 +303,19 @@ int RGWSI_BucketIndex_RADOS::open_bucket_index_shard(const DoutPrefixProvider *d
const RGWBucketInfo& bucket_info,
const rgw::bucket_index_layout_generation& index,
int shard_id,
- RGWSI_RADOS::Obj *bucket_obj)
+ rgw_rados_ref* bucket_obj)
{
- RGWSI_RADOS::Pool index_pool;
string bucket_oid_base;
- int ret = open_bucket_index_base(dpp, bucket_info, &index_pool, &bucket_oid_base);
+ int ret = open_bucket_index_base(dpp, bucket_info, &bucket_obj->ioctx,
+ &bucket_oid_base);
if (ret < 0) {
ldpp_dout(dpp, 20) << __func__ << ": open_bucket_index_pool() returned "
<< ret << dendl;
return ret;
}
- string oid;
-
get_bucket_index_object(bucket_oid_base, index.layout.normal,
- index.gen, shard_id, &oid);
-
- *bucket_obj = svc.rados->obj(index_pool, oid);
+ index.gen, shard_id, &bucket_obj->obj.oid);
return 0;
}
@@ -339,7 +328,7 @@ int RGWSI_BucketIndex_RADOS::cls_bucket_head(const DoutPrefixProvider *dpp,
map<int, string> *bucket_instance_ids,
optional_yield y)
{
- RGWSI_RADOS::Pool index_pool;
+ librados::IoCtx index_pool;
map<int, string> oids;
int r = open_bucket_index(dpp, bucket_info, shard_id, idx_layout, &index_pool, &oids, bucket_instance_ids);
if (r < 0)
@@ -350,7 +339,8 @@ int RGWSI_BucketIndex_RADOS::cls_bucket_head(const DoutPrefixProvider *dpp,
list_results.emplace(iter.first, rgw_cls_list_ret());
}
- r = CLSRGWIssueGetDirHeader(index_pool.ioctx(), oids, list_results, cct->_conf->rgw_bucket_index_max_aio)();
+ r = CLSRGWIssueGetDirHeader(index_pool, oids, list_results,
+ cct->_conf->rgw_bucket_index_max_aio)();
if (r < 0)
return r;
@@ -361,9 +351,9 @@ int RGWSI_BucketIndex_RADOS::cls_bucket_head(const DoutPrefixProvider *dpp,
return 0;
}
-int RGWSI_BucketIndex_RADOS::init_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout)
+int RGWSI_BucketIndex_RADOS::init_index(const DoutPrefixProvider *dpp,RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout)
{
- RGWSI_RADOS::Pool index_pool;
+ librados::IoCtx index_pool;
string dir_oid = dir_oid_prefix;
int r = open_bucket_index_pool(dpp, bucket_info, &index_pool);
@@ -376,14 +366,14 @@ int RGWSI_BucketIndex_RADOS::init_index(const DoutPrefixProvider *dpp, RGWBucket
map<int, string> bucket_objs;
get_bucket_index_objects(dir_oid, idx_layout.layout.normal.num_shards, idx_layout.gen, &bucket_objs);
- return CLSRGWIssueBucketIndexInit(index_pool.ioctx(),
+ return CLSRGWIssueBucketIndexInit(index_pool,
bucket_objs,
cct->_conf->rgw_bucket_index_max_aio)();
}
int RGWSI_BucketIndex_RADOS::clean_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout)
{
- RGWSI_RADOS::Pool index_pool;
+ librados::IoCtx index_pool;
std::string dir_oid = dir_oid_prefix;
int r = open_bucket_index_pool(dpp, bucket_info, &index_pool);
@@ -397,7 +387,7 @@ int RGWSI_BucketIndex_RADOS::clean_index(const DoutPrefixProvider *dpp, RGWBucke
get_bucket_index_objects(dir_oid, idx_layout.layout.normal.num_shards,
idx_layout.gen, &bucket_objs);
- return CLSRGWIssueBucketIndexClean(index_pool.ioctx(),
+ return CLSRGWIssueBucketIndexClean(index_pool,
bucket_objs,
cct->_conf->rgw_bucket_index_max_aio)();
}
@@ -440,7 +430,7 @@ int RGWSI_BucketIndex_RADOS::get_reshard_status(const DoutPrefixProvider *dpp, c
{
map<int, string> bucket_objs;
- RGWSI_RADOS::Pool index_pool;
+ librados::IoCtx index_pool;
int r = open_bucket_index(dpp, bucket_info,
std::nullopt,
@@ -455,7 +445,7 @@ int RGWSI_BucketIndex_RADOS::get_reshard_status(const DoutPrefixProvider *dpp, c
for (auto i : bucket_objs) {
cls_rgw_bucket_instance_entry entry;
- int ret = cls_rgw_get_bucket_resharding(index_pool.ioctx(), i.second, &entry);
+ int ret = cls_rgw_get_bucket_resharding(index_pool, i.second, &entry);
if (ret < 0 && ret != -ENOENT) {
ldpp_dout(dpp, -1) << "ERROR: " << __func__ << ": cls_rgw_get_bucket_resharding() returned ret=" << ret << dendl;
return ret;
diff --git a/src/rgw/services/svc_bi_rados.h b/src/rgw/services/svc_bi_rados.h
index feba0cfcd19..c6c11f8bc00 100644
--- a/src/rgw/services/svc_bi_rados.h
+++ b/src/rgw/services/svc_bi_rados.h
@@ -21,7 +21,6 @@
#include "rgw_tools.h"
#include "svc_bi.h"
-#include "svc_rados.h"
#include "svc_tier_rados.h"
struct rgw_bucket_dir_header;
@@ -45,15 +44,15 @@ class RGWSI_BucketIndex_RADOS : public RGWSI_BucketIndex
int open_pool(const DoutPrefixProvider *dpp,
const rgw_pool& pool,
- RGWSI_RADOS::Pool *index_pool,
+ librados::IoCtx* index_pool,
bool mostly_omap);
int open_bucket_index_pool(const DoutPrefixProvider *dpp,
- const RGWBucketInfo& bucket_info,
- RGWSI_RADOS::Pool *index_pool);
+ const RGWBucketInfo& bucket_info,
+ librados::IoCtx* index_pool);
int open_bucket_index_base(const DoutPrefixProvider *dpp,
const RGWBucketInfo& bucket_info,
- RGWSI_RADOS::Pool *index_pool,
+ librados::IoCtx* index_pool,
std::string *bucket_oid_base);
// return the index oid for the given shard id
@@ -77,9 +76,10 @@ class RGWSI_BucketIndex_RADOS : public RGWSI_BucketIndex
public:
+ librados::Rados* rados{nullptr};
+
struct Svc {
RGWSI_Zone *zone{nullptr};
- RGWSI_RADOS *rados{nullptr};
RGWSI_BILog_RADOS *bilog{nullptr};
RGWDataChangesLog *datalog_rados{nullptr};
} svc;
@@ -87,7 +87,7 @@ public:
RGWSI_BucketIndex_RADOS(CephContext *cct);
void init(RGWSI_Zone *zone_svc,
- RGWSI_RADOS *rados_svc,
+ librados::Rados* rados_,
RGWSI_BILog_RADOS *bilog_svc,
RGWDataChangesLog *datalog_rados_svc);
@@ -141,26 +141,24 @@ public:
int open_bucket_index_shard(const DoutPrefixProvider *dpp,
const RGWBucketInfo& bucket_info,
const std::string& obj_key,
- RGWSI_RADOS::Obj *bucket_obj,
+ rgw_rados_ref* bucket_obj,
int *shard_id);
int open_bucket_index_shard(const DoutPrefixProvider *dpp,
const RGWBucketInfo& bucket_info,
const rgw::bucket_index_layout_generation& index,
- int shard_id, RGWSI_RADOS::Obj *bucket_obj);
+ int shard_id, rgw_rados_ref* bucket_obj);
int open_bucket_index(const DoutPrefixProvider *dpp,
const RGWBucketInfo& bucket_info,
- RGWSI_RADOS::Pool *index_pool,
+ librados::IoCtx* index_pool,
std::string *bucket_oid);
int open_bucket_index(const DoutPrefixProvider *dpp,
const RGWBucketInfo& bucket_info,
std::optional<int> shard_id,
const rgw::bucket_index_layout_generation& idx_layout,
- RGWSI_RADOS::Pool *index_pool,
+ librados::IoCtx* index_pool,
std::map<int, std::string> *bucket_objs,
std::map<int, std::string> *bucket_instance_ids);
};
-
-
diff --git a/src/rgw/services/svc_bilog_rados.cc b/src/rgw/services/svc_bilog_rados.cc
index f4bb13ec1f0..a33eb508848 100644
--- a/src/rgw/services/svc_bilog_rados.cc
+++ b/src/rgw/services/svc_bilog_rados.cc
@@ -26,7 +26,7 @@ int RGWSI_BILog_RADOS::log_trim(const DoutPrefixProvider *dpp,
std::string_view start_marker,
std::string_view end_marker)
{
- RGWSI_RADOS::Pool index_pool;
+ librados::IoCtx index_pool;
map<int, string> bucket_objs;
BucketIndexShardsManager start_marker_mgr;
@@ -48,32 +48,32 @@ int RGWSI_BILog_RADOS::log_trim(const DoutPrefixProvider *dpp,
return r;
}
- return CLSRGWIssueBILogTrim(index_pool.ioctx(), start_marker_mgr, end_marker_mgr, bucket_objs,
+ return CLSRGWIssueBILogTrim(index_pool, start_marker_mgr, end_marker_mgr, bucket_objs,
cct->_conf->rgw_bucket_index_max_aio)();
}
int RGWSI_BILog_RADOS::log_start(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw::bucket_log_layout_generation& log_layout, int shard_id)
{
- RGWSI_RADOS::Pool index_pool;
+ librados::IoCtx index_pool;
map<int, string> bucket_objs;
const auto& current_index = rgw::log_to_index_layout(log_layout);
int r = svc.bi->open_bucket_index(dpp, bucket_info, shard_id, current_index, &index_pool, &bucket_objs, nullptr);
if (r < 0)
return r;
- return CLSRGWIssueResyncBucketBILog(index_pool.ioctx(), bucket_objs, cct->_conf->rgw_bucket_index_max_aio)();
+ return CLSRGWIssueResyncBucketBILog(index_pool, bucket_objs, cct->_conf->rgw_bucket_index_max_aio)();
}
int RGWSI_BILog_RADOS::log_stop(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw::bucket_log_layout_generation& log_layout, int shard_id)
{
- RGWSI_RADOS::Pool index_pool;
+ librados::IoCtx index_pool;
map<int, string> bucket_objs;
const auto& current_index = rgw::log_to_index_layout(log_layout);
int r = svc.bi->open_bucket_index(dpp, bucket_info, shard_id, current_index, &index_pool, &bucket_objs, nullptr);
if (r < 0)
return r;
- return CLSRGWIssueBucketBILogStop(index_pool.ioctx(), bucket_objs, cct->_conf->rgw_bucket_index_max_aio)();
+ return CLSRGWIssueBucketBILogStop(index_pool, bucket_objs, cct->_conf->rgw_bucket_index_max_aio)();
}
static void build_bucket_index_marker(const string& shard_id_str,
@@ -95,7 +95,7 @@ int RGWSI_BILog_RADOS::log_list(const DoutPrefixProvider *dpp,
ldpp_dout(dpp, 20) << __func__ << ": " << bucket_info.bucket << " marker " << marker << " shard_id=" << shard_id << " max " << max << dendl;
result.clear();
- RGWSI_RADOS::Pool index_pool;
+ librados::IoCtx index_pool;
map<int, string> oids;
map<int, cls_rgw_bi_log_list_ret> bi_log_lists;
const auto& current_index = rgw::log_to_index_layout(log_layout);
@@ -112,8 +112,8 @@ int RGWSI_BILog_RADOS::log_list(const DoutPrefixProvider *dpp,
r = marker_mgr.from_string(marker, shard_id);
if (r < 0)
return r;
-
- r = CLSRGWIssueBILogList(index_pool.ioctx(), marker_mgr, max, oids, bi_log_lists, cct->_conf->rgw_bucket_index_max_aio)();
+
+ r = CLSRGWIssueBILogList(index_pool, marker_mgr, max, oids, bi_log_lists, cct->_conf->rgw_bucket_index_max_aio)();
if (r < 0)
return r;
diff --git a/src/rgw/services/svc_bilog_rados.h b/src/rgw/services/svc_bilog_rados.h
index e9d5dbb5c0e..e9c948d3fa2 100644
--- a/src/rgw/services/svc_bilog_rados.h
+++ b/src/rgw/services/svc_bilog_rados.h
@@ -1,4 +1,3 @@
-
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab ft=cpp
@@ -19,11 +18,6 @@
#include "rgw_service.h"
-#include "svc_rados.h"
-
-
-
-
class RGWSI_BILog_RADOS : public RGWServiceInstance
{
public:
diff --git a/src/rgw/services/svc_cls.cc b/src/rgw/services/svc_cls.cc
index 342146bfefa..740d9ab0307 100644
--- a/src/rgw/services/svc_cls.cc
+++ b/src/rgw/services/svc_cls.cc
@@ -3,7 +3,6 @@
#include "svc_cls.h"
-#include "svc_rados.h"
#include "svc_zone.h"
#include "rgw_zone.h"
@@ -30,13 +29,12 @@ int RGWSI_Cls::do_start(optional_yield y, const DoutPrefixProvider *dpp)
return 0;
}
-int RGWSI_Cls::MFA::get_mfa_obj(const DoutPrefixProvider *dpp, const rgw_user& user, std::optional<RGWSI_RADOS::Obj> *obj)
+int RGWSI_Cls::MFA::get_mfa_ref(const DoutPrefixProvider *dpp, const rgw_user& user, rgw_rados_ref *ref)
{
string oid = get_mfa_oid(user);
- rgw_raw_obj o(zone_svc->get_zone_params().otp_pool, oid);
+ rgw_raw_obj o(cls->zone_svc->get_zone_params().otp_pool, oid);
- obj->emplace(rados_svc->obj(o));
- int r = (*obj)->open(dpp);
+ auto r = rgw_get_rados_ref(dpp, cls->rados, o, ref);
if (r < 0) {
ldpp_dout(dpp, 4) << "failed to open rados context for " << o << dendl;
return r;
@@ -45,17 +43,6 @@ int RGWSI_Cls::MFA::get_mfa_obj(const DoutPrefixProvider *dpp, const rgw_user& u
return 0;
}
-int RGWSI_Cls::MFA::get_mfa_ref(const DoutPrefixProvider *dpp, const rgw_user& user, rgw_rados_ref *ref)
-{
- std::optional<RGWSI_RADOS::Obj> obj;
- int r = get_mfa_obj(dpp, user, &obj);
- if (r < 0) {
- return r;
- }
- *ref = obj->get_ref();
- return 0;
-}
-
int RGWSI_Cls::MFA::check_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, const string& otp_id, const string& pin, optional_yield y)
{
rgw_rados_ref ref;
@@ -66,7 +53,7 @@ int RGWSI_Cls::MFA::check_mfa(const DoutPrefixProvider *dpp, const rgw_user& use
rados::cls::otp::otp_check_t result;
- r = rados::cls::otp::OTP::check(cct, ref.pool.ioctx(), ref.obj.oid, otp_id, pin, &result);
+ r = rados::cls::otp::OTP::check(cct, ref.ioctx, ref.obj.oid, otp_id, pin, &result);
if (r < 0)
return r;
@@ -102,8 +89,8 @@ void RGWSI_Cls::MFA::prepare_mfa_write(librados::ObjectWriteOperation *op,
int RGWSI_Cls::MFA::create_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, const rados::cls::otp::otp_info_t& config,
RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime, optional_yield y)
{
- std::optional<RGWSI_RADOS::Obj> obj;
- int r = get_mfa_obj(dpp, user, &obj);
+ rgw_rados_ref obj;
+ int r = get_mfa_ref(dpp, user, &obj);
if (r < 0) {
return r;
}
@@ -111,7 +98,7 @@ int RGWSI_Cls::MFA::create_mfa(const DoutPrefixProvider *dpp, const rgw_user& us
librados::ObjectWriteOperation op;
prepare_mfa_write(&op, objv_tracker, mtime);
rados::cls::otp::OTP::create(&op, config);
- r = obj->operate(dpp, &op, y);
+ r = obj.operate(dpp, &op, y);
if (r < 0) {
ldpp_dout(dpp, 20) << "OTP create, otp_id=" << config.id << " result=" << (int)r << dendl;
return r;
@@ -126,8 +113,8 @@ int RGWSI_Cls::MFA::remove_mfa(const DoutPrefixProvider *dpp,
const ceph::real_time& mtime,
optional_yield y)
{
- std::optional<RGWSI_RADOS::Obj> obj;
- int r = get_mfa_obj(dpp, user, &obj);
+ rgw_rados_ref obj;
+ int r = get_mfa_ref(dpp, user, &obj);
if (r < 0) {
return r;
}
@@ -135,7 +122,7 @@ int RGWSI_Cls::MFA::remove_mfa(const DoutPrefixProvider *dpp,
librados::ObjectWriteOperation op;
prepare_mfa_write(&op, objv_tracker, mtime);
rados::cls::otp::OTP::remove(&op, id);
- r = obj->operate(dpp, &op, y);
+ r = obj.operate(dpp, &op, y);
if (r < 0) {
ldpp_dout(dpp, 20) << "OTP remove, otp_id=" << id << " result=" << (int)r << dendl;
return r;
@@ -154,7 +141,7 @@ int RGWSI_Cls::MFA::get_mfa(const DoutPrefixProvider *dpp, const rgw_user& user,
return r;
}
- r = rados::cls::otp::OTP::get(nullptr, ref.pool.ioctx(), ref.obj.oid, id, result);
+ r = rados::cls::otp::OTP::get(nullptr, ref.ioctx, ref.obj.oid, id, result);
if (r < 0) {
return r;
}
@@ -172,7 +159,7 @@ int RGWSI_Cls::MFA::list_mfa(const DoutPrefixProvider *dpp, const rgw_user& user
return r;
}
- r = rados::cls::otp::OTP::get_all(nullptr, ref.pool.ioctx(), ref.obj.oid, result);
+ r = rados::cls::otp::OTP::get_all(nullptr, ref.ioctx, ref.obj.oid, result);
if (r < 0) {
return r;
}
@@ -190,7 +177,7 @@ int RGWSI_Cls::MFA::otp_get_current_time(const DoutPrefixProvider *dpp, const rg
return r;
}
- r = rados::cls::otp::OTP::get_current_time(ref.pool.ioctx(), ref.obj.oid, result);
+ r = rados::cls::otp::OTP::get_current_time(ref.ioctx, ref.obj.oid, result);
if (r < 0) {
return r;
}
@@ -203,11 +190,12 @@ int RGWSI_Cls::MFA::set_mfa(const DoutPrefixProvider *dpp, const string& oid, co
const real_time& mtime,
optional_yield y)
{
- rgw_raw_obj o(zone_svc->get_zone_params().otp_pool, oid);
- auto obj = rados_svc->obj(o);
- int r = obj.open(dpp);
+ rgw_rados_ref obj;
+ int r = rgw_get_rados_ref(dpp, cls->rados,
+ { cls->zone_svc->get_zone_params().otp_pool, oid },
+ &obj);
if (r < 0) {
- ldpp_dout(dpp, 4) << "failed to open rados context for " << o << dendl;
+ ldpp_dout(dpp, 4) << "failed to open rados context for " << oid << dendl;
return r;
}
librados::ObjectWriteOperation op;
@@ -231,21 +219,21 @@ int RGWSI_Cls::MFA::list_mfa(const DoutPrefixProvider *dpp, const string& oid, l
RGWObjVersionTracker *objv_tracker, ceph::real_time *pmtime,
optional_yield y)
{
- rgw_raw_obj o(zone_svc->get_zone_params().otp_pool, oid);
- auto obj = rados_svc->obj(o);
- int r = obj.open(dpp);
+ rgw_rados_ref ref;
+ int r = rgw_get_rados_ref(dpp, cls->rados,
+ { cls->zone_svc->get_zone_params().otp_pool, oid },
+ &ref);
if (r < 0) {
- ldpp_dout(dpp, 4) << "failed to open rados context for " << o << dendl;
+ ldpp_dout(dpp, 4) << "failed to open rados context for " << oid << dendl;
return r;
}
- auto& ref = obj.get_ref();
librados::ObjectReadOperation op;
struct timespec mtime_ts;
if (pmtime) {
op.stat2(nullptr, &mtime_ts, nullptr);
}
objv_tracker->prepare_op_for_read(&op);
- r = rados::cls::otp::OTP::get_all(&op, ref.pool.ioctx(), ref.obj.oid, result);
+ r = rados::cls::otp::OTP::get_all(&op, ref.ioctx, ref.obj.oid, result);
if (r < 0) {
return r;
}
@@ -265,14 +253,13 @@ void RGWSI_Cls::TimeLog::prepare_entry(cls_log_entry& entry,
cls_log_add_prepare_entry(entry, utime_t(ut), section, key, bl);
}
-int RGWSI_Cls::TimeLog::init_obj(const DoutPrefixProvider *dpp, const string& oid, RGWSI_RADOS::Obj& obj)
+int RGWSI_Cls::TimeLog::init_obj(const DoutPrefixProvider *dpp, const string& oid, rgw_rados_ref& obj)
{
- rgw_raw_obj o(zone_svc->get_zone_params().log_pool, oid);
- obj = rados_svc->obj(o);
- return obj.open(dpp);
-
+ rgw_raw_obj o(cls->zone_svc->get_zone_params().log_pool, oid);
+ return rgw_get_rados_ref(dpp, cls->rados, o, &obj);
}
-int RGWSI_Cls::TimeLog::add(const DoutPrefixProvider *dpp,
+
+int RGWSI_Cls::TimeLog::add(const DoutPrefixProvider *dpp,
const string& oid,
const real_time& ut,
const string& section,
@@ -280,8 +267,7 @@ int RGWSI_Cls::TimeLog::add(const DoutPrefixProvider *dpp,
bufferlist& bl,
optional_yield y)
{
- RGWSI_RADOS::Obj obj;
-
+ rgw_rados_ref obj;
int r = init_obj(dpp, oid, obj);
if (r < 0) {
return r;
@@ -301,7 +287,7 @@ int RGWSI_Cls::TimeLog::add(const DoutPrefixProvider *dpp,
bool monotonic_inc,
optional_yield y)
{
- RGWSI_RADOS::Obj obj;
+ rgw_rados_ref obj;
int r = init_obj(dpp, oid, obj);
if (r < 0) {
@@ -329,7 +315,7 @@ int RGWSI_Cls::TimeLog::list(const DoutPrefixProvider *dpp,
bool *truncated,
optional_yield y)
{
- RGWSI_RADOS::Obj obj;
+ rgw_rados_ref obj;
int r = init_obj(dpp, oid, obj);
if (r < 0) {
@@ -358,7 +344,7 @@ int RGWSI_Cls::TimeLog::info(const DoutPrefixProvider *dpp,
cls_log_header *header,
optional_yield y)
{
- RGWSI_RADOS::Obj obj;
+ rgw_rados_ref obj;
int r = init_obj(dpp, oid, obj);
if (r < 0) {
@@ -379,7 +365,7 @@ int RGWSI_Cls::TimeLog::info(const DoutPrefixProvider *dpp,
}
int RGWSI_Cls::TimeLog::info_async(const DoutPrefixProvider *dpp,
- RGWSI_RADOS::Obj& obj,
+ rgw_rados_ref& obj,
const string& oid,
cls_log_header *header,
librados::AioCompletion *completion)
@@ -409,7 +395,7 @@ int RGWSI_Cls::TimeLog::trim(const DoutPrefixProvider *dpp,
librados::AioCompletion *completion,
optional_yield y)
{
- RGWSI_RADOS::Obj obj;
+ rgw_rados_ref obj;
int r = init_obj(dpp, oid, obj);
if (r < 0) {
@@ -438,22 +424,23 @@ int RGWSI_Cls::Lock::lock_exclusive(const DoutPrefixProvider *dpp,
string& owner_id,
std::optional<string> lock_name)
{
- auto p = rados_svc->pool(pool);
- int r = p.open(dpp);
+
+ librados::IoCtx p;
+ int r = rgw_init_ioctx(dpp, cls->rados, pool, p, true, false);
if (r < 0) {
return r;
}
uint64_t msec = std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
utime_t ut(msec / 1000, msec % 1000);
-
+
rados::cls::lock::Lock l(lock_name.value_or(log_lock_name));
l.set_duration(ut);
l.set_cookie(owner_id);
l.set_tag(zone_id);
l.set_may_renew(true);
-
- return l.lock_exclusive(&p.ioctx(), oid);
+
+ return l.lock_exclusive(&p, oid);
}
int RGWSI_Cls::Lock::unlock(const DoutPrefixProvider *dpp,
@@ -463,16 +450,15 @@ int RGWSI_Cls::Lock::unlock(const DoutPrefixProvider *dpp,
string& owner_id,
std::optional<string> lock_name)
{
- auto p = rados_svc->pool(pool);
- int r = p.open(dpp);
+ librados::IoCtx p;
+ int r = rgw_init_ioctx(dpp, cls->rados, pool, p, true, false);
if (r < 0) {
return r;
}
-
+
rados::cls::lock::Lock l(lock_name.value_or(log_lock_name));
l.set_tag(zone_id);
l.set_cookie(owner_id);
-
- return l.unlock(&p.ioctx(), oid);
-}
+ return l.unlock(&p, oid);
+}
diff --git a/src/rgw/services/svc_cls.h b/src/rgw/services/svc_cls.h
index d1d1d659be8..6648714dbc8 100644
--- a/src/rgw/services/svc_cls.h
+++ b/src/rgw/services/svc_cls.h
@@ -21,25 +21,21 @@
#include "rgw_service.h"
-#include "svc_rados.h"
+#include "driver/rados/rgw_tools.h"
class RGWSI_Cls : public RGWServiceInstance
{
RGWSI_Zone *zone_svc{nullptr};
- RGWSI_RADOS *rados_svc{nullptr};
+ librados::Rados* rados{nullptr};
class ClsSubService : public RGWServiceInstance {
friend class RGWSI_Cls;
- RGWSI_Cls *cls_svc{nullptr};
- RGWSI_Zone *zone_svc{nullptr};
- RGWSI_RADOS *rados_svc{nullptr};
+ RGWSI_Cls *cls{nullptr};
- void init(RGWSI_Cls *_cls_svc, RGWSI_Zone *_zone_svc, RGWSI_RADOS *_rados_svc) {
- cls_svc = _cls_svc;
- zone_svc = _cls_svc->zone_svc;
- rados_svc = _cls_svc->rados_svc;
+ void init(RGWSI_Cls *cls_) {
+ cls = cls_;
}
public:
@@ -48,7 +44,6 @@ class RGWSI_Cls : public RGWServiceInstance
public:
class MFA : public ClsSubService {
- int get_mfa_obj(const DoutPrefixProvider *dpp, const rgw_user& user, std::optional<RGWSI_RADOS::Obj> *obj);
int get_mfa_ref(const DoutPrefixProvider *dpp, const rgw_user& user, rgw_rados_ref *ref);
void prepare_mfa_write(librados::ObjectWriteOperation *op,
@@ -81,7 +76,7 @@ public:
} mfa;
class TimeLog : public ClsSubService {
- int init_obj(const DoutPrefixProvider *dpp, const std::string& oid, RGWSI_RADOS::Obj& obj);
+ int init_obj(const DoutPrefixProvider *dpp, const std::string& oid, rgw_rados_ref& obj);
public:
TimeLog(CephContext *cct): ClsSubService(cct) {}
@@ -117,7 +112,7 @@ public:
cls_log_header *header,
optional_yield y);
int info_async(const DoutPrefixProvider *dpp,
- RGWSI_RADOS::Obj& obj,
+ rgw_rados_ref& obj,
const std::string& oid,
cls_log_header *header,
librados::AioCompletion *completion);
@@ -132,7 +127,7 @@ public:
} timelog;
class Lock : public ClsSubService {
- int init_obj(const std::string& oid, RGWSI_RADOS::Obj& obj);
+ int init_obj(const std::string& oid, rgw_rados_ref& obj);
public:
Lock(CephContext *cct): ClsSubService(cct) {}
int lock_exclusive(const DoutPrefixProvider *dpp,
@@ -152,15 +147,14 @@ public:
RGWSI_Cls(CephContext *cct): RGWServiceInstance(cct), mfa(cct), timelog(cct), lock(cct) {}
- void init(RGWSI_Zone *_zone_svc, RGWSI_RADOS *_rados_svc) {
- rados_svc = _rados_svc;
+ void init(RGWSI_Zone *_zone_svc, librados::Rados* rados_) {
+ rados = rados_;
zone_svc = _zone_svc;
- mfa.init(this, zone_svc, rados_svc);
- timelog.init(this, zone_svc, rados_svc);
- lock.init(this, zone_svc, rados_svc);
+ mfa.init(this);
+ timelog.init(this);
+ lock.init(this);
}
int do_start(optional_yield, const DoutPrefixProvider *dpp) override;
};
-
diff --git a/src/rgw/services/svc_config_key_rados.cc b/src/rgw/services/svc_config_key_rados.cc
index 5edb02ea7f3..c17139af292 100644
--- a/src/rgw/services/svc_config_key_rados.cc
+++ b/src/rgw/services/svc_config_key_rados.cc
@@ -1,14 +1,17 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
-#include "svc_rados.h"
#include "svc_config_key_rados.h"
-using namespace std;
+#include "rgw_tools.h"
+
+using std::string;
RGWSI_ConfigKey_RADOS::~RGWSI_ConfigKey_RADOS(){}
int RGWSI_ConfigKey_RADOS::do_start(optional_yield, const DoutPrefixProvider *dpp)
{
- maybe_insecure_mon_conn = !svc.rados->check_secure_mon_conn(dpp);
+ maybe_insecure_mon_conn = !rgw_check_secure_mon_conn(dpp);
return 0;
}
@@ -20,14 +23,17 @@ void RGWSI_ConfigKey_RADOS::warn_if_insecure()
return;
}
- string s = "rgw is configured to optionally allow insecure connections to the monitors (auth_supported, ms_mon_client_mode), ssl certificates stored at the monitor configuration could leak";
+ string s = ("rgw is configured to optionally allow insecure connections to "
+ "the monitors (auth_supported, ms_mon_client_mode), ssl "
+ "certificates stored at the monitor configuration could leak");
- svc.rados->clog_warn(s);
+ rgw_clog_warn(rados, s);
lderr(ctx()) << __func__ << "(): WARNING: " << s << dendl;
}
-int RGWSI_ConfigKey_RADOS::get(const string& key, bool secure, bufferlist *result)
+int RGWSI_ConfigKey_RADOS::get(const string& key, bool secure,
+ bufferlist *result)
{
string cmd =
"{"
@@ -36,8 +42,7 @@ int RGWSI_ConfigKey_RADOS::get(const string& key, bool secure, bufferlist *resul
"}";
bufferlist inbl;
- auto handle = svc.rados->handle();
- int ret = handle.mon_command(cmd, inbl, result, nullptr);
+ int ret = rados->mon_command(cmd, inbl, result, nullptr);
if (ret < 0) {
return ret;
}
diff --git a/src/rgw/services/svc_config_key_rados.h b/src/rgw/services/svc_config_key_rados.h
index b3b995ac76d..344350278af 100644
--- a/src/rgw/services/svc_config_key_rados.h
+++ b/src/rgw/services/svc_config_key_rados.h
@@ -1,5 +1,3 @@
-
-
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab ft=cpp
@@ -24,8 +22,6 @@
#include "svc_config_key.h"
-class RGWSI_RADOS;
-
class RGWSI_ConfigKey_RADOS : public RGWSI_ConfigKey
{
bool maybe_insecure_mon_conn{false};
@@ -36,12 +32,10 @@ class RGWSI_ConfigKey_RADOS : public RGWSI_ConfigKey
void warn_if_insecure();
public:
- struct Svc {
- RGWSI_RADOS *rados{nullptr};
- } svc;
+ librados::Rados* rados{nullptr};
- void init(RGWSI_RADOS *rados_svc) {
- svc.rados = rados_svc;
+ void init(librados::Rados* rados_) {
+ rados = rados_;
}
RGWSI_ConfigKey_RADOS(CephContext *cct) : RGWSI_ConfigKey(cct) {}
@@ -50,5 +44,3 @@ public:
int get(const std::string& key, bool secure, bufferlist *result) override;
};
-
-
diff --git a/src/rgw/services/svc_mdlog.cc b/src/rgw/services/svc_mdlog.cc
index da723f3e129..46a158ceb77 100644
--- a/src/rgw/services/svc_mdlog.cc
+++ b/src/rgw/services/svc_mdlog.cc
@@ -2,7 +2,6 @@
// vim: ts=8 sw=2 smarttab ft=cpp
#include "svc_mdlog.h"
-#include "svc_rados.h"
#include "svc_zone.h"
#include "svc_sys_obj.h"
@@ -30,13 +29,16 @@ RGWSI_MDLog::RGWSI_MDLog(CephContext *cct, bool _run_sync) : RGWServiceInstance(
RGWSI_MDLog::~RGWSI_MDLog() {
}
-int RGWSI_MDLog::init(RGWSI_RADOS *_rados_svc, RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc, RGWSI_Cls *_cls_svc)
+int RGWSI_MDLog::init(librados::Rados* rados_, RGWSI_Zone *_zone_svc,
+ RGWSI_SysObj *_sysobj_svc, RGWSI_Cls *_cls_svc,
+ RGWAsyncRadosProcessor* async_processor_)
{
svc.zone = _zone_svc;
svc.sysobj = _sysobj_svc;
svc.mdlog = this;
- svc.rados = _rados_svc;
+ rados = rados_;
svc.cls = _cls_svc;
+ async_processor = async_processor_;
return 0;
}
@@ -262,11 +264,12 @@ class ReadHistoryCR : public RGWCoroutine {
ReadHistoryCR(const DoutPrefixProvider *dpp,
const Svc& svc,
Cursor *cursor,
- RGWObjVersionTracker *objv_tracker)
+ RGWObjVersionTracker *objv_tracker,
+ RGWAsyncRadosProcessor* async_processor)
: RGWCoroutine(svc.zone->ctx()), dpp(dpp), svc(svc),
cursor(cursor),
objv_tracker(objv_tracker),
- async_processor(svc.rados->get_async_processor())
+ async_processor(async_processor)
{}
int operate(const DoutPrefixProvider *dpp) {
@@ -312,10 +315,11 @@ class WriteHistoryCR : public RGWCoroutine {
WriteHistoryCR(const DoutPrefixProvider *dpp,
Svc& svc,
const Cursor& cursor,
- RGWObjVersionTracker *objv)
+ RGWObjVersionTracker *objv,
+ RGWAsyncRadosProcessor* async_processor)
: RGWCoroutine(svc.zone->ctx()), dpp(dpp), svc(svc),
cursor(cursor), objv(objv),
- async_processor(svc.rados->get_async_processor())
+ async_processor(async_processor)
{}
int operate(const DoutPrefixProvider *dpp) {
@@ -353,18 +357,22 @@ class TrimHistoryCR : public RGWCoroutine {
RGWObjVersionTracker *objv; //< to prevent racing updates
Cursor next; //< target cursor for oldest log period
Cursor existing; //< existing cursor read from disk
+ RGWAsyncRadosProcessor* async_processor;
public:
- TrimHistoryCR(const DoutPrefixProvider *dpp, const Svc& svc, Cursor cursor, RGWObjVersionTracker *objv)
+ TrimHistoryCR(const DoutPrefixProvider *dpp, const Svc& svc, Cursor cursor,
+ RGWObjVersionTracker *objv,
+ RGWAsyncRadosProcessor* async_processor)
: RGWCoroutine(svc.zone->ctx()), dpp(dpp), svc(svc),
- cursor(cursor), objv(objv), next(cursor) {
+ cursor(cursor), objv(objv), next(cursor),
+ async_processor(async_processor) {
next.next(); // advance past cursor
}
int operate(const DoutPrefixProvider *dpp) {
reenter(this) {
// read an existing history, and write the new history if it's newer
- yield call(new ReadHistoryCR(dpp, svc, &existing, objv));
+ yield call(new ReadHistoryCR(dpp, svc, &existing, objv, async_processor));
if (retcode < 0) {
return set_cr_error(retcode);
}
@@ -375,7 +383,7 @@ class TrimHistoryCR : public RGWCoroutine {
return set_cr_error(-ECANCELED);
}
// overwrite with updated history
- yield call(new WriteHistoryCR(dpp, svc, next, objv));
+ yield call(new WriteHistoryCR(dpp, svc, next, objv, async_processor));
if (retcode < 0) {
return set_cr_error(retcode);
}
@@ -512,13 +520,13 @@ Cursor RGWSI_MDLog::read_oldest_log_period(optional_yield y, const DoutPrefixPro
RGWCoroutine* RGWSI_MDLog::read_oldest_log_period_cr(const DoutPrefixProvider *dpp,
Cursor *period, RGWObjVersionTracker *objv) const
{
- return new mdlog::ReadHistoryCR(dpp, svc, period, objv);
+ return new mdlog::ReadHistoryCR(dpp, svc, period, objv, async_processor);
}
RGWCoroutine* RGWSI_MDLog::trim_log_period_cr(const DoutPrefixProvider *dpp,
Cursor period, RGWObjVersionTracker *objv) const
{
- return new mdlog::TrimHistoryCR(dpp, svc, period, objv);
+ return new mdlog::TrimHistoryCR(dpp, svc, period, objv, async_processor);
}
RGWMetadataLog* RGWSI_MDLog::get_log(const std::string& period)
diff --git a/src/rgw/services/svc_mdlog.h b/src/rgw/services/svc_mdlog.h
index 8b37ba11e56..f169ee88db9 100644
--- a/src/rgw/services/svc_mdlog.h
+++ b/src/rgw/services/svc_mdlog.h
@@ -29,7 +29,6 @@ class RGWCoroutine;
class RGWSI_Zone;
class RGWSI_SysObj;
-class RGWSI_RADOS;
namespace mdlog {
class ReadHistoryCR;
@@ -58,18 +57,21 @@ public:
RGWSI_MDLog(CephContext *cct, bool run_sync);
virtual ~RGWSI_MDLog();
+ librados::Rados* rados{nullptr};
+ RGWAsyncRadosProcessor* async_processor{nullptr};
+
struct Svc {
- RGWSI_RADOS *rados{nullptr};
RGWSI_Zone *zone{nullptr};
RGWSI_SysObj *sysobj{nullptr};
RGWSI_MDLog *mdlog{nullptr};
RGWSI_Cls *cls{nullptr};
} svc;
- int init(RGWSI_RADOS *_rados_svc,
+ int init(librados::Rados* rados_,
RGWSI_Zone *_zone_svc,
RGWSI_SysObj *_sysobj_svc,
- RGWSI_Cls *_cls_svc);
+ RGWSI_Cls *_cls_svc,
+ RGWAsyncRadosProcessor* async_processor_);
int do_start(optional_yield y, const DoutPrefixProvider *dpp) override;
diff --git a/src/rgw/services/svc_notify.cc b/src/rgw/services/svc_notify.cc
index 43f84ed0a4f..5593dee9ae2 100644
--- a/src/rgw/services/svc_notify.cc
+++ b/src/rgw/services/svc_notify.cc
@@ -9,7 +9,6 @@
#include "svc_notify.h"
#include "svc_finisher.h"
#include "svc_zone.h"
-#include "svc_rados.h"
#include "rgw_zone.h"
@@ -29,7 +28,7 @@ class RGWWatcher : public DoutPrefixProvider , public librados::WatchCtx2 {
CephContext *cct;
RGWSI_Notify *svc;
int index;
- RGWSI_RADOS::Obj obj;
+ rgw_rados_ref obj;
uint64_t watch_handle;
int register_ret{0};
bool unregister_done{false};
@@ -51,7 +50,8 @@ class RGWWatcher : public DoutPrefixProvider , public librados::WatchCtx2 {
}
public:
- RGWWatcher(CephContext *_cct, RGWSI_Notify *s, int i, RGWSI_RADOS::Obj& o) : cct(_cct), svc(s), index(i), obj(o), watch_handle(0) {}
+ RGWWatcher(CephContext *_cct, RGWSI_Notify *s, int i, rgw_rados_ref& o)
+ : cct(_cct), svc(s), index(i), obj(o), watch_handle(0) {}
void handle_notify(uint64_t notify_id,
uint64_t cookie,
uint64_t notifier_id,
@@ -174,7 +174,7 @@ string RGWSI_Notify::get_control_oid(int i)
}
// do not call pick_obj_control before init_watch
-RGWSI_RADOS::Obj RGWSI_Notify::pick_control_obj(const string& key)
+rgw_rados_ref RGWSI_Notify::pick_control_obj(const string& key)
{
uint32_t r = ceph_str_hash_linux(key.c_str(), key.size());
@@ -206,17 +206,17 @@ int RGWSI_Notify::init_watch(const DoutPrefixProvider *dpp, optional_yield y)
notify_oid = notify_oid_prefix;
}
- notify_objs[i] = rados_svc->handle().obj({control_pool, notify_oid});
- auto& notify_obj = notify_objs[i];
-
- int r = notify_obj.open(dpp);
+ int r = rgw_get_rados_ref(dpp, rados, { control_pool, notify_oid },
+ &notify_objs[i]);
if (r < 0) {
ldpp_dout(dpp, 0) << "ERROR: notify_obj.open() returned r=" << r << dendl;
return r;
}
+ auto& notify_obj = notify_objs[i];
librados::ObjectWriteOperation op;
op.create(false);
+
r = notify_obj.operate(dpp, &op, y);
if (r < 0 && r != -EEXIST) {
ldpp_dout(dpp, 0) << "ERROR: notify_obj.operate() returned r=" << r << dendl;
@@ -270,10 +270,6 @@ int RGWSI_Notify::do_start(optional_yield y, const DoutPrefixProvider *dpp)
assert(zone_svc->is_started()); /* otherwise there's an ordering problem */
- r = rados_svc->start(y, dpp);
- if (r < 0) {
- return r;
- }
r = finisher_svc->start(y, dpp);
if (r < 0) {
return r;
@@ -315,14 +311,14 @@ void RGWSI_Notify::shutdown()
finalized = true;
}
-int RGWSI_Notify::unwatch(RGWSI_RADOS::Obj& obj, uint64_t watch_handle)
+int RGWSI_Notify::unwatch(rgw_rados_ref& obj, uint64_t watch_handle)
{
int r = obj.unwatch(watch_handle);
if (r < 0) {
ldout(cct, 0) << "ERROR: rados->unwatch2() returned r=" << r << dendl;
return r;
}
- r = rados_svc->handle().watch_flush();
+ r = rados->watch_flush();
if (r < 0) {
ldout(cct, 0) << "ERROR: rados->watch_flush() returned r=" << r << dendl;
return r;
@@ -392,9 +388,9 @@ int RGWSI_Notify::distribute(const DoutPrefixProvider *dpp, const string& key,
which will lead to division by 0 in pick_obj_control (num_watchers is 0).
*/
if (num_watchers > 0) {
- RGWSI_RADOS::Obj notify_obj = pick_control_obj(key);
+ auto notify_obj = pick_control_obj(key);
- ldpp_dout(dpp, 10) << "distributing notification oid=" << notify_obj.get_ref().obj
+ ldpp_dout(dpp, 10) << "distributing notification oid=" << notify_obj.obj
<< " cni=" << cni << dendl;
return robust_notify(dpp, notify_obj, cni, y);
}
@@ -443,7 +439,7 @@ static timeout_vector decode_timeouts(const bufferlist& bl)
}
int RGWSI_Notify::robust_notify(const DoutPrefixProvider *dpp,
- RGWSI_RADOS::Obj& notify_obj,
+ rgw_rados_ref& notify_obj,
const RGWCacheNotifyInfo& cni,
optional_yield y)
{
diff --git a/src/rgw/services/svc_notify.h b/src/rgw/services/svc_notify.h
index f7329136ece..4f7e9d17ee3 100644
--- a/src/rgw/services/svc_notify.h
+++ b/src/rgw/services/svc_notify.h
@@ -5,7 +5,7 @@
#include "rgw_service.h"
-#include "svc_rados.h"
+#include "rgw_tools.h"
class Context;
@@ -21,14 +21,14 @@ class RGWSI_Notify : public RGWServiceInstance
{
friend class RGWWatcher;
friend class RGWSI_Notify_ShutdownCB;
- friend class RGWServices_Def;
+ friend struct RGWServices_Def;
public:
class CB;
private:
RGWSI_Zone *zone_svc{nullptr};
- RGWSI_RADOS *rados_svc{nullptr};
+ librados::Rados *rados{nullptr};
RGWSI_Finisher *finisher_svc{nullptr};
ceph::shared_mutex watchers_lock = ceph::make_shared_mutex("watchers_lock");
@@ -37,7 +37,7 @@ private:
int num_watchers{0};
RGWWatcher **watchers{nullptr};
std::set<int> watchers_set;
- std::vector<RGWSI_RADOS::Obj> notify_objs;
+ std::vector<rgw_rados_ref> notify_objs;
bool enabled{false};
@@ -45,7 +45,7 @@ private:
uint64_t max_notify_retries = 10;
std::string get_control_oid(int i);
- RGWSI_RADOS::Obj pick_control_obj(const std::string& key);
+ rgw_rados_ref pick_control_obj(const std::string& key);
CB *cb{nullptr};
@@ -58,16 +58,16 @@ private:
void finalize_watch();
void init(RGWSI_Zone *_zone_svc,
- RGWSI_RADOS *_rados_svc,
+ librados::Rados* rados_,
RGWSI_Finisher *_finisher_svc) {
zone_svc = _zone_svc;
- rados_svc = _rados_svc;
+ rados = rados_;
finisher_svc = _finisher_svc;
}
int do_start(optional_yield, const DoutPrefixProvider *dpp) override;
void shutdown() override;
- int unwatch(RGWSI_RADOS::Obj& obj, uint64_t watch_handle);
+ int unwatch(rgw_rados_ref& obj, uint64_t watch_handle);
void add_watcher(int i);
void remove_watcher(int i);
@@ -79,7 +79,7 @@ private:
void _set_enabled(bool status);
void set_enabled(bool status);
- int robust_notify(const DoutPrefixProvider *dpp, RGWSI_RADOS::Obj& notify_obj,
+ int robust_notify(const DoutPrefixProvider *dpp, rgw_rados_ref& notify_obj,
const RGWCacheNotifyInfo& bl, optional_yield y);
void schedule_context(Context *c);
diff --git a/src/rgw/services/svc_rados.cc b/src/rgw/services/svc_rados.cc
deleted file mode 100644
index d682b87eb2c..00000000000
--- a/src/rgw/services/svc_rados.cc
+++ /dev/null
@@ -1,445 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab ft=cpp
-
-#include "svc_rados.h"
-
-#include "include/rados/librados.hpp"
-#include "common/errno.h"
-#include "osd/osd_types.h"
-#include "rgw_tools.h"
-#include "rgw_cr_rados.h"
-
-#include "auth/AuthRegistry.h"
-
-#define dout_subsys ceph_subsys_rgw
-
-using namespace std;
-
-RGWSI_RADOS::RGWSI_RADOS(CephContext *cct) : RGWServiceInstance(cct)
-{
-}
-
-RGWSI_RADOS::~RGWSI_RADOS()
-{
-}
-
-int RGWSI_RADOS::do_start(optional_yield, const DoutPrefixProvider *dpp)
-{
- int ret = rados.init_with_context(cct);
- if (ret < 0) {
- return ret;
- }
- ret = rados.connect();
- if (ret < 0) {
- return ret;
- }
-
- async_processor.reset(new RGWAsyncRadosProcessor(cct, cct->_conf->rgw_num_async_rados_threads));
- async_processor->start();
-
- return 0;
-}
-
-void RGWSI_RADOS::shutdown()
-{
- if (async_processor) {
- async_processor->stop();
- }
- rados.shutdown();
-}
-
-void RGWSI_RADOS::stop_processor()
-{
- if (async_processor) {
- async_processor->stop();
- }
-}
-
-librados::Rados* RGWSI_RADOS::get_rados_handle()
-{
- return &rados;
-}
-
-std::string RGWSI_RADOS::cluster_fsid()
-{
- std::string fsid;
- (void) get_rados_handle()->cluster_fsid(&fsid);
- return fsid;
-}
-
-uint64_t RGWSI_RADOS::instance_id()
-{
- return get_rados_handle()->get_instance_id();
-}
-
-int RGWSI_RADOS::open_pool_ctx(const DoutPrefixProvider *dpp, const rgw_pool& pool, librados::IoCtx& io_ctx,
- const OpenParams& params)
-{
- return rgw_init_ioctx(dpp, get_rados_handle(), pool, io_ctx,
- params.create,
- params.mostly_omap);
-}
-
-int RGWSI_RADOS::pool_iterate(const DoutPrefixProvider *dpp,
- librados::IoCtx& io_ctx,
- librados::NObjectIterator& iter,
- uint32_t num, vector<rgw_bucket_dir_entry>& objs,
- RGWAccessListFilter *filter,
- bool *is_truncated)
-{
- if (iter == io_ctx.nobjects_end())
- return -ENOENT;
-
- uint32_t i;
-
- for (i = 0; i < num && iter != io_ctx.nobjects_end(); ++i, ++iter) {
- rgw_bucket_dir_entry e;
-
- string oid = iter->get_oid();
- ldpp_dout(dpp, 20) << "RGWRados::pool_iterate: got " << oid << dendl;
-
- // fill it in with initial values; we may correct later
- if (filter && !filter->filter(oid, oid))
- continue;
-
- e.key = oid;
- objs.push_back(e);
- }
-
- if (is_truncated)
- *is_truncated = (iter != io_ctx.nobjects_end());
-
- return objs.size();
-}
-
-RGWSI_RADOS::Obj::Obj(Pool& pool, const string& oid) : rados_svc(pool.rados_svc)
-{
- ref.pool = pool;
- ref.obj = rgw_raw_obj(pool.get_pool(), oid);
-}
-
-void RGWSI_RADOS::Obj::init(const rgw_raw_obj& obj)
-{
- ref.pool = RGWSI_RADOS::Pool(rados_svc, obj.pool);
- ref.obj = obj;
-}
-
-int RGWSI_RADOS::Obj::open(const DoutPrefixProvider *dpp)
-{
- int r = ref.pool.open(dpp);
- if (r < 0) {
- return r;
- }
-
- ref.pool.ioctx().locator_set_key(ref.obj.loc);
-
- return 0;
-}
-
-int RGWSI_RADOS::Obj::operate(const DoutPrefixProvider *dpp, librados::ObjectWriteOperation *op,
- optional_yield y, int flags)
-{
- return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, op, y, flags);
-}
-
-int RGWSI_RADOS::Obj::operate(const DoutPrefixProvider *dpp, librados::ObjectReadOperation *op,
- bufferlist *pbl, optional_yield y, int flags)
-{
- return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, op, pbl, y, flags);
-}
-
-int RGWSI_RADOS::Obj::aio_operate(librados::AioCompletion *c, librados::ObjectWriteOperation *op)
-{
- return ref.pool.ioctx().aio_operate(ref.obj.oid, c, op);
-}
-
-int RGWSI_RADOS::Obj::aio_operate(librados::AioCompletion *c, librados::ObjectReadOperation *op,
- bufferlist *pbl)
-{
- return ref.pool.ioctx().aio_operate(ref.obj.oid, c, op, pbl);
-}
-
-int RGWSI_RADOS::Obj::watch(uint64_t *handle, librados::WatchCtx2 *ctx)
-{
- return ref.pool.ioctx().watch2(ref.obj.oid, handle, ctx);
-}
-
-int RGWSI_RADOS::Obj::aio_watch(librados::AioCompletion *c, uint64_t *handle, librados::WatchCtx2 *ctx)
-{
- return ref.pool.ioctx().aio_watch(ref.obj.oid, c, handle, ctx);
-}
-
-int RGWSI_RADOS::Obj::unwatch(uint64_t handle)
-{
- return ref.pool.ioctx().unwatch2(handle);
-}
-
-int RGWSI_RADOS::Obj::notify(const DoutPrefixProvider *dpp, bufferlist& bl, uint64_t timeout_ms,
- bufferlist *pbl, optional_yield y)
-{
- return rgw_rados_notify(dpp, ref.pool.ioctx(), ref.obj.oid, bl, timeout_ms, pbl, y);
-}
-
-void RGWSI_RADOS::Obj::notify_ack(uint64_t notify_id,
- uint64_t cookie,
- bufferlist& bl)
-{
- ref.pool.ioctx().notify_ack(ref.obj.oid, notify_id, cookie, bl);
-}
-
-uint64_t RGWSI_RADOS::Obj::get_last_version()
-{
- return ref.pool.ioctx().get_last_version();
-}
-
-int RGWSI_RADOS::Pool::create(const DoutPrefixProvider *dpp)
-{
- librados::Rados *rad = rados_svc->get_rados_handle();
- int r = rad->pool_create(pool.name.c_str());
- if (r < 0) {
- ldpp_dout(dpp, 0) << "WARNING: pool_create returned " << r << dendl;
- return r;
- }
- librados::IoCtx io_ctx;
- r = rad->ioctx_create(pool.name.c_str(), io_ctx);
- if (r < 0) {
- ldpp_dout(dpp, 0) << "WARNING: ioctx_create returned " << r << dendl;
- return r;
- }
- r = io_ctx.application_enable(pg_pool_t::APPLICATION_NAME_RGW, false);
- if (r < 0) {
- ldpp_dout(dpp, 0) << "WARNING: application_enable returned " << r << dendl;
- return r;
- }
- return 0;
-}
-
-int RGWSI_RADOS::Pool::create(const DoutPrefixProvider *dpp, const vector<rgw_pool>& pools, vector<int> *retcodes)
-{
- vector<librados::PoolAsyncCompletion *> completions;
- vector<int> rets;
-
- librados::Rados *rad = rados_svc->get_rados_handle();
- for (auto iter = pools.begin(); iter != pools.end(); ++iter) {
- librados::PoolAsyncCompletion *c = librados::Rados::pool_async_create_completion();
- completions.push_back(c);
- auto& pool = *iter;
- int ret = rad->pool_create_async(pool.name.c_str(), c);
- rets.push_back(ret);
- }
-
- vector<int>::iterator riter;
- vector<librados::PoolAsyncCompletion *>::iterator citer;
-
- bool error = false;
- ceph_assert(rets.size() == completions.size());
- for (riter = rets.begin(), citer = completions.begin(); riter != rets.end(); ++riter, ++citer) {
- int r = *riter;
- librados::PoolAsyncCompletion *c = *citer;
- if (r == 0) {
- c->wait();
- r = c->get_return_value();
- if (r < 0) {
- ldpp_dout(dpp, 0) << "WARNING: async pool_create returned " << r << dendl;
- error = true;
- }
- }
- c->release();
- retcodes->push_back(r);
- }
- if (error) {
- return 0;
- }
-
- std::vector<librados::IoCtx> io_ctxs;
- retcodes->clear();
- for (auto pool : pools) {
- io_ctxs.emplace_back();
- int ret = rad->ioctx_create(pool.name.c_str(), io_ctxs.back());
- if (ret < 0) {
- ldpp_dout(dpp, 0) << "WARNING: ioctx_create returned " << ret << dendl;
- error = true;
- }
- retcodes->push_back(ret);
- }
- if (error) {
- return 0;
- }
-
- completions.clear();
- for (auto &io_ctx : io_ctxs) {
- librados::PoolAsyncCompletion *c =
- librados::Rados::pool_async_create_completion();
- completions.push_back(c);
- int ret = io_ctx.application_enable_async(pg_pool_t::APPLICATION_NAME_RGW,
- false, c);
- ceph_assert(ret == 0);
- }
-
- retcodes->clear();
- for (auto c : completions) {
- c->wait();
- int ret = c->get_return_value();
- if (ret == -EOPNOTSUPP) {
- ret = 0;
- } else if (ret < 0) {
- ldpp_dout(dpp, 0) << "WARNING: async application_enable returned " << ret
- << dendl;
- error = true;
- }
- c->release();
- retcodes->push_back(ret);
- }
- return 0;
-}
-
-int RGWSI_RADOS::Pool::lookup()
-{
- librados::Rados *rad = rados_svc->get_rados_handle();
- int ret = rad->pool_lookup(pool.name.c_str());
- if (ret < 0) {
- return ret;
- }
-
- return 0;
-}
-
-int RGWSI_RADOS::Pool::open(const DoutPrefixProvider *dpp, const OpenParams& params)
-{
- return rados_svc->open_pool_ctx(dpp, pool, state.ioctx, params);
-}
-
-int RGWSI_RADOS::Pool::List::init(const DoutPrefixProvider *dpp, const string& marker, RGWAccessListFilter *filter)
-{
- if (ctx.initialized) {
- return -EINVAL;
- }
-
- if (!pool) {
- return -EINVAL;
- }
-
- int r = pool->rados_svc->open_pool_ctx(dpp, pool->pool, ctx.ioctx);
- if (r < 0) {
- return r;
- }
-
- librados::ObjectCursor oc;
- if (!oc.from_str(marker)) {
- ldpp_dout(dpp, 10) << "failed to parse cursor: " << marker << dendl;
- return -EINVAL;
- }
-
- try {
- ctx.iter = ctx.ioctx.nobjects_begin(oc);
- ctx.filter = filter;
- ctx.initialized = true;
- return 0;
- } catch (const std::system_error& e) {
- r = -e.code().value();
- ldpp_dout(dpp, 10) << "nobjects_begin threw " << e.what()
- << ", returning " << r << dendl;
- return r;
- } catch (const std::exception& e) {
- ldpp_dout(dpp, 10) << "nobjects_begin threw " << e.what()
- << ", returning -5" << dendl;
- return -EIO;
- }
-}
-
-int RGWSI_RADOS::Pool::List::get_next(const DoutPrefixProvider *dpp,
- int max,
- std::vector<string> *oids,
- bool *is_truncated)
-{
- if (!ctx.initialized) {
- return -EINVAL;
- }
- vector<rgw_bucket_dir_entry> objs;
- int r = pool->rados_svc->pool_iterate(dpp, ctx.ioctx, ctx.iter, max, objs, ctx.filter, is_truncated);
- if (r < 0) {
- if(r != -ENOENT) {
- ldpp_dout(dpp, 10) << "failed to list objects pool_iterate returned r=" << r << dendl;
- }
- return r;
- }
-
- for (auto& o : objs) {
- oids->push_back(o.key.name);
- }
-
- return oids->size();
-}
-
-RGWSI_RADOS::Obj RGWSI_RADOS::Handle::obj(const rgw_raw_obj& o)
-{
- return RGWSI_RADOS::Obj(rados_svc, o);
-}
-int RGWSI_RADOS::Handle::watch_flush()
-{
- librados::Rados *rad = rados_svc->get_rados_handle();
- return rad->watch_flush();
-}
-
-int RGWSI_RADOS::Handle::mon_command(std::string cmd,
- const bufferlist& inbl,
- bufferlist *outbl,
- std::string *outs)
-{
- librados::Rados *rad = rados_svc->get_rados_handle();
- return rad->mon_command(cmd, inbl, outbl, outs);
-}
-
-int RGWSI_RADOS::Pool::List::get_marker(string *marker)
-{
- if (!ctx.initialized) {
- return -EINVAL;
- }
-
- *marker = ctx.iter.get_cursor().to_str();
- return 0;
-}
-
-int RGWSI_RADOS::clog_warn(const string& msg)
-{
- string cmd =
- "{"
- "\"prefix\": \"log\", "
- "\"level\": \"warn\", "
- "\"logtext\": [\"" + msg + "\"]"
- "}";
-
- bufferlist inbl;
- auto h = handle();
- return h.mon_command(cmd, inbl, nullptr, nullptr);
-}
-
-bool RGWSI_RADOS::check_secure_mon_conn(const DoutPrefixProvider *dpp) const
-{
- AuthRegistry reg(cct);
-
- reg.refresh_config();
-
- std::vector<uint32_t> methods;
- std::vector<uint32_t> modes;
-
- reg.get_supported_methods(CEPH_ENTITY_TYPE_MON, &methods, &modes);
- ldpp_dout(dpp, 20) << __func__ << "(): auth registry supported: methods=" << methods << " modes=" << modes << dendl;
-
- for (auto method : methods) {
- if (!reg.is_secure_method(method)) {
- ldpp_dout(dpp, 20) << __func__ << "(): method " << method << " is insecure" << dendl;
- return false;
- }
- }
-
- for (auto mode : modes) {
- if (!reg.is_secure_mode(mode)) {
- ldpp_dout(dpp, 20) << __func__ << "(): mode " << mode << " is insecure" << dendl;
- return false;
- }
- }
-
- return true;
-}
-
diff --git a/src/rgw/services/svc_rados.h b/src/rgw/services/svc_rados.h
deleted file mode 100644
index ede029aa897..00000000000
--- a/src/rgw/services/svc_rados.h
+++ /dev/null
@@ -1,252 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab ft=cpp
-
-#pragma once
-
-#include "rgw_service.h"
-
-#include "include/rados/librados.hpp"
-#include "common/async/yield_context.h"
-
-class RGWAsyncRadosProcessor;
-
-class RGWAccessListFilter {
-public:
- virtual ~RGWAccessListFilter() {}
- virtual bool filter(const std::string& name, std::string& key) = 0;
-};
-
-struct RGWAccessListFilterPrefix : public RGWAccessListFilter {
- std::string prefix;
-
- explicit RGWAccessListFilterPrefix(const std::string& _prefix) : prefix(_prefix) {}
- bool filter(const std::string& name, std::string& key) override {
- return (prefix.compare(key.substr(0, prefix.size())) == 0);
- }
-};
-
-class RGWSI_RADOS : public RGWServiceInstance
-{
- librados::Rados rados;
- std::unique_ptr<RGWAsyncRadosProcessor> async_processor;
-
- int do_start(optional_yield, const DoutPrefixProvider *dpp) override;
-
-public:
- struct OpenParams {
- bool create{true};
- bool mostly_omap{false};
-
- OpenParams() {}
-
- OpenParams& set_create(bool _create) {
- create = _create;
- return *this;
- }
- OpenParams& set_mostly_omap(bool _mostly_omap) {
- mostly_omap = _mostly_omap;
- return *this;
- }
- };
-
-private:
- int open_pool_ctx(const DoutPrefixProvider *dpp, const rgw_pool& pool, librados::IoCtx& io_ctx,
- const OpenParams& params = {});
- int pool_iterate(const DoutPrefixProvider *dpp,
- librados::IoCtx& ioctx,
- librados::NObjectIterator& iter,
- uint32_t num, std::vector<rgw_bucket_dir_entry>& objs,
- RGWAccessListFilter *filter,
- bool *is_truncated);
-
-public:
- RGWSI_RADOS(CephContext *cct);
- ~RGWSI_RADOS();
- librados::Rados* get_rados_handle();
-
- void init() {}
- void shutdown() override;
- void stop_processor();
-
- std::string cluster_fsid();
- uint64_t instance_id();
- bool check_secure_mon_conn(const DoutPrefixProvider *dpp) const;
-
- RGWAsyncRadosProcessor *get_async_processor() {
- return async_processor.get();
- }
-
- int clog_warn(const std::string& msg);
-
- class Handle;
-
- class Pool {
- friend class RGWSI_RADOS;
- friend Handle;
- friend class Obj;
-
- RGWSI_RADOS *rados_svc{nullptr};
- rgw_pool pool;
-
- struct State {
- librados::IoCtx ioctx;
- } state;
-
- Pool(RGWSI_RADOS *_rados_svc,
- const rgw_pool& _pool) : rados_svc(_rados_svc),
- pool(_pool) {}
-
- Pool(RGWSI_RADOS *_rados_svc) : rados_svc(_rados_svc) {}
- public:
- Pool() {}
-
- int create(const DoutPrefixProvider *dpp);
- int create(const DoutPrefixProvider *dpp, const std::vector<rgw_pool>& pools, std::vector<int> *retcodes);
- int lookup();
- int open(const DoutPrefixProvider *dpp, const OpenParams& params = {});
-
- const rgw_pool& get_pool() {
- return pool;
- }
-
- librados::IoCtx& ioctx() & {
- return state.ioctx;
- }
-
- librados::IoCtx&& ioctx() && {
- return std::move(state.ioctx);
- }
-
- struct List {
- Pool *pool{nullptr};
-
- struct Ctx {
- bool initialized{false};
- librados::IoCtx ioctx;
- librados::NObjectIterator iter;
- RGWAccessListFilter *filter{nullptr};
- } ctx;
-
- List() {}
- List(Pool *_pool) : pool(_pool) {}
-
- int init(const DoutPrefixProvider *dpp, const std::string& marker, RGWAccessListFilter *filter = nullptr);
- int get_next(const DoutPrefixProvider *dpp, int max,
- std::vector<std::string> *oids,
- bool *is_truncated);
-
- int get_marker(std::string *marker);
- };
-
- List op() {
- return List(this);
- }
-
- friend List;
- };
-
-
- struct rados_ref {
- RGWSI_RADOS::Pool pool;
- rgw_raw_obj obj;
- };
-
- class Obj {
- friend class RGWSI_RADOS;
- friend class Handle;
-
- RGWSI_RADOS *rados_svc{nullptr};
- rados_ref ref;
-
- void init(const rgw_raw_obj& obj);
-
- Obj(RGWSI_RADOS *_rados_svc, const rgw_raw_obj& _obj)
- : rados_svc(_rados_svc) {
- init(_obj);
- }
-
- Obj(Pool& pool, const std::string& oid);
-
- public:
- Obj() {}
-
- int open(const DoutPrefixProvider *dpp);
-
- int operate(const DoutPrefixProvider *dpp, librados::ObjectWriteOperation *op, optional_yield y,
- int flags = 0);
- int operate(const DoutPrefixProvider *dpp, librados::ObjectReadOperation *op, bufferlist *pbl,
- optional_yield y, int flags = 0);
- int aio_operate(librados::AioCompletion *c, librados::ObjectWriteOperation *op);
- int aio_operate(librados::AioCompletion *c, librados::ObjectReadOperation *op,
- bufferlist *pbl);
-
- int watch(uint64_t *handle, librados::WatchCtx2 *ctx);
- int aio_watch(librados::AioCompletion *c, uint64_t *handle, librados::WatchCtx2 *ctx);
- int unwatch(uint64_t handle);
- int notify(const DoutPrefixProvider *dpp, bufferlist& bl, uint64_t timeout_ms,
- bufferlist *pbl, optional_yield y);
- void notify_ack(uint64_t notify_id,
- uint64_t cookie,
- bufferlist& bl);
-
- uint64_t get_last_version();
-
- rados_ref& get_ref() { return ref; }
- const rados_ref& get_ref() const { return ref; }
-
- const rgw_raw_obj& get_raw_obj() const {
- return ref.obj;
- }
- };
-
- class Handle {
- friend class RGWSI_RADOS;
-
- RGWSI_RADOS *rados_svc{nullptr};
-
- Handle(RGWSI_RADOS *_rados_svc) : rados_svc(_rados_svc) {}
- public:
- Obj obj(const rgw_raw_obj& o);
-
- Pool pool(const rgw_pool& p) {
- return Pool(rados_svc, p);
- }
-
- int watch_flush();
-
- int mon_command(std::string cmd,
- const bufferlist& inbl,
- bufferlist *outbl,
- std::string *outs);
- };
-
- Handle handle() {
- return Handle(this);
- }
-
- Obj obj(const rgw_raw_obj& o) {
- return Obj(this, o);
- }
-
- Obj obj(Pool& pool, const std::string& oid) {
- return Obj(pool, oid);
- }
-
- Pool pool() {
- return Pool(this);
- }
-
- Pool pool(const rgw_pool& p) {
- return Pool(this, p);
- }
-
- friend Obj;
- friend Pool;
- friend Pool::List;
-};
-
-using rgw_rados_ref = RGWSI_RADOS::rados_ref;
-
-inline std::ostream& operator<<(std::ostream& out, const RGWSI_RADOS::Obj& obj) {
- return out << obj.get_raw_obj();
-}
diff --git a/src/rgw/services/svc_sys_obj.cc b/src/rgw/services/svc_sys_obj.cc
index 310e60514d2..38e7acdfe0b 100644
--- a/src/rgw/services/svc_sys_obj.cc
+++ b/src/rgw/services/svc_sys_obj.cc
@@ -3,7 +3,6 @@
#include "svc_sys_obj.h"
#include "svc_sys_obj_core.h"
-#include "svc_rados.h"
#include "svc_zone.h"
#include "rgw_zone.h"
diff --git a/src/rgw/services/svc_sys_obj.h b/src/rgw/services/svc_sys_obj.h
index 292340dcbe3..0bba2fbe01d 100644
--- a/src/rgw/services/svc_sys_obj.h
+++ b/src/rgw/services/svc_sys_obj.h
@@ -7,7 +7,6 @@
#include "rgw_service.h"
-#include "svc_rados.h"
#include "svc_sys_obj_types.h"
#include "svc_sys_obj_core_types.h"
@@ -246,12 +245,12 @@ public:
friend class Pool::Op;
protected:
- RGWSI_RADOS *rados_svc{nullptr};
+ librados::Rados* rados{nullptr};
RGWSI_SysObj_Core *core_svc{nullptr};
- void init(RGWSI_RADOS *_rados_svc,
+ void init(librados::Rados* rados_,
RGWSI_SysObj_Core *_core_svc) {
- rados_svc = _rados_svc;
+ rados = rados_;
core_svc = _core_svc;
}
diff --git a/src/rgw/services/svc_sys_obj_cache.cc b/src/rgw/services/svc_sys_obj_cache.cc
index d1b7a3dbb3e..cd4e2c405ab 100644
--- a/src/rgw/services/svc_sys_obj_cache.cc
+++ b/src/rgw/services/svc_sys_obj_cache.cc
@@ -1,4 +1,3 @@
-
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab ft=cpp
diff --git a/src/rgw/services/svc_sys_obj_cache.h b/src/rgw/services/svc_sys_obj_cache.h
index f7950843fa9..8e2f5845dc1 100644
--- a/src/rgw/services/svc_sys_obj_cache.h
+++ b/src/rgw/services/svc_sys_obj_cache.h
@@ -17,7 +17,7 @@ class RGWSI_SysObj_Cache_ASocketHook;
class RGWSI_SysObj_Cache : public RGWSI_SysObj_Core
{
friend class RGWSI_SysObj_Cache_CB;
- friend class RGWServices_Def;
+ friend RGWServices_Def;
friend class ASocketHandler;
RGWSI_Notify *notify_svc{nullptr};
@@ -27,10 +27,10 @@ class RGWSI_SysObj_Cache : public RGWSI_SysObj_Core
void normalize_pool_and_obj(const rgw_pool& src_pool, const std::string& src_obj, rgw_pool& dst_pool, std::string& dst_obj);
protected:
- void init(RGWSI_RADOS *_rados_svc,
+ void init(librados::Rados* rados_,
RGWSI_Zone *_zone_svc,
RGWSI_Notify *_notify_svc) {
- core_init(_rados_svc, _zone_svc);
+ core_init(rados_, _zone_svc);
notify_svc = _notify_svc;
}
@@ -80,12 +80,12 @@ protected:
real_time set_mtime,
optional_yield y) override;
- int write_data(const DoutPrefixProvider *dpp,
+ int write_data(const DoutPrefixProvider *dpp,
const rgw_raw_obj& obj,
const bufferlist& bl,
bool exclusive,
RGWObjVersionTracker *objv_tracker,
- optional_yield y);
+ optional_yield y) override;
int distribute_cache(const DoutPrefixProvider *dpp, const std::string& normal_name, const rgw_raw_obj& obj,
ObjectCacheInfo& obj_info, int op,
diff --git a/src/rgw/services/svc_sys_obj_core.cc b/src/rgw/services/svc_sys_obj_core.cc
index 30308969131..397709c5d99 100644
--- a/src/rgw/services/svc_sys_obj_core.cc
+++ b/src/rgw/services/svc_sys_obj_core.cc
@@ -2,7 +2,6 @@
// vim: ts=8 sw=2 smarttab ft=cpp
#include "svc_sys_obj_core.h"
-#include "svc_rados.h"
#include "svc_zone.h"
#include "rgw_tools.h"
@@ -12,10 +11,10 @@
using namespace std;
int RGWSI_SysObj_Core_GetObjState::get_rados_obj(const DoutPrefixProvider *dpp,
- RGWSI_RADOS *rados_svc,
+ librados::Rados* rados,
RGWSI_Zone *zone_svc,
const rgw_raw_obj& obj,
- RGWSI_RADOS::Obj **pobj)
+ rgw_rados_ref** pobj)
{
if (!has_rados_obj) {
if (obj.oid.empty()) {
@@ -23,8 +22,7 @@ int RGWSI_SysObj_Core_GetObjState::get_rados_obj(const DoutPrefixProvider *dpp,
return -EINVAL;
}
- rados_obj = rados_svc->obj(obj);
- int r = rados_obj.open(dpp);
+ int r = rgw_get_rados_ref(dpp, rados, obj, &rados_obj);
if (r < 0) {
return r;
}
@@ -37,15 +35,14 @@ int RGWSI_SysObj_Core_GetObjState::get_rados_obj(const DoutPrefixProvider *dpp,
int RGWSI_SysObj_Core::get_rados_obj(const DoutPrefixProvider *dpp,
RGWSI_Zone *zone_svc,
const rgw_raw_obj& obj,
- RGWSI_RADOS::Obj *pobj)
+ rgw_rados_ref* pobj)
{
if (obj.oid.empty()) {
ldpp_dout(dpp, 0) << "ERROR: obj.oid is empty" << dendl;
return -EINVAL;
}
- *pobj = rados_svc->obj(obj);
- int r = pobj->open(dpp);
+ int r = rgw_get_rados_ref(dpp, rados, obj, pobj);
if (r < 0) {
return r;
}
@@ -59,7 +56,7 @@ int RGWSI_SysObj_Core::raw_stat(const DoutPrefixProvider *dpp, const rgw_raw_obj
RGWObjVersionTracker *objv_tracker,
optional_yield y)
{
- RGWSI_RADOS::Obj rados_obj;
+ rgw_rados_ref rados_obj;
int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
if (r < 0) {
return r;
@@ -172,7 +169,7 @@ int RGWSI_SysObj_Core::read(const DoutPrefixProvider *dpp,
}
}
- RGWSI_RADOS::Obj rados_obj;
+ rgw_rados_ref rados_obj;
int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
if (r < 0) {
ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
@@ -185,7 +182,7 @@ int RGWSI_SysObj_Core::read(const DoutPrefixProvider *dpp,
}
ldpp_dout(dpp, 20) << "rados_obj.operate() r=" << r << " bl.length=" << bl->length() << dendl;
- uint64_t op_ver = rados_obj.get_last_version();
+ uint64_t op_ver = rados_obj.ioctx.get_last_version();
if (read_state.last_ver > 0 &&
read_state.last_ver != op_ver) {
@@ -218,7 +215,7 @@ int RGWSI_SysObj_Core::get_attr(const DoutPrefixProvider *dpp,
bufferlist *dest,
optional_yield y)
{
- RGWSI_RADOS::Obj rados_obj;
+ rgw_rados_ref rados_obj;
int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
if (r < 0) {
ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
@@ -229,7 +226,7 @@ int RGWSI_SysObj_Core::get_attr(const DoutPrefixProvider *dpp,
int rval;
op.getxattr(name, dest, &rval);
-
+
r = rados_obj.operate(dpp, &op, nullptr, y);
if (r < 0)
return r;
@@ -244,7 +241,7 @@ int RGWSI_SysObj_Core::set_attrs(const DoutPrefixProvider *dpp,
RGWObjVersionTracker *objv_tracker,
bool exclusive, optional_yield y)
{
- RGWSI_RADOS::Obj rados_obj;
+ rgw_rados_ref rados_obj;
int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
if (r < 0) {
ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
@@ -301,7 +298,7 @@ int RGWSI_SysObj_Core::omap_get_vals(const DoutPrefixProvider *dpp,
bool *pmore,
optional_yield y)
{
- RGWSI_RADOS::Obj rados_obj;
+ rgw_rados_ref rados_obj;
int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
if (r < 0) {
ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
@@ -341,7 +338,7 @@ int RGWSI_SysObj_Core::omap_get_all(const DoutPrefixProvider *dpp,
std::map<string, bufferlist> *m,
optional_yield y)
{
- RGWSI_RADOS::Obj rados_obj;
+ rgw_rados_ref rados_obj;
int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
if (r < 0) {
ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
@@ -359,7 +356,7 @@ int RGWSI_SysObj_Core::omap_get_all(const DoutPrefixProvider *dpp,
std::map<string, bufferlist> t;
int rval;
op.omap_get_vals2(start_after, count, &t, &more, &rval);
-
+
r = rados_obj.operate(dpp, &op, nullptr, y);
if (r < 0) {
return r;
@@ -377,7 +374,7 @@ int RGWSI_SysObj_Core::omap_set(const DoutPrefixProvider *dpp, const rgw_raw_obj
bufferlist& bl, bool must_exist,
optional_yield y)
{
- RGWSI_RADOS::Obj rados_obj;
+ rgw_rados_ref rados_obj;
int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
if (r < 0) {
ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
@@ -400,7 +397,7 @@ int RGWSI_SysObj_Core::omap_set(const DoutPrefixProvider *dpp, const rgw_raw_obj
const std::map<std::string, bufferlist>& m,
bool must_exist, optional_yield y)
{
- RGWSI_RADOS::Obj rados_obj;
+ rgw_rados_ref rados_obj;
int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
if (r < 0) {
ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
@@ -418,7 +415,7 @@ int RGWSI_SysObj_Core::omap_set(const DoutPrefixProvider *dpp, const rgw_raw_obj
int RGWSI_SysObj_Core::omap_del(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, const std::string& key,
optional_yield y)
{
- RGWSI_RADOS::Obj rados_obj;
+ rgw_rados_ref rados_obj;
int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
if (r < 0) {
ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
@@ -440,7 +437,7 @@ int RGWSI_SysObj_Core::notify(const DoutPrefixProvider *dpp, const rgw_raw_obj&
uint64_t timeout_ms, bufferlist *pbl,
optional_yield y)
{
- RGWSI_RADOS::Obj rados_obj;
+ rgw_rados_ref rados_obj;
int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
if (r < 0) {
ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
@@ -456,7 +453,7 @@ int RGWSI_SysObj_Core::remove(const DoutPrefixProvider *dpp,
const rgw_raw_obj& obj,
optional_yield y)
{
- RGWSI_RADOS::Obj rados_obj;
+ rgw_rados_ref rados_obj;
int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
if (r < 0) {
ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
@@ -487,7 +484,7 @@ int RGWSI_SysObj_Core::write(const DoutPrefixProvider *dpp,
real_time set_mtime,
optional_yield y)
{
- RGWSI_RADOS::Obj rados_obj;
+ rgw_rados_ref rados_obj;
int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
if (r < 0) {
ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
@@ -552,7 +549,7 @@ int RGWSI_SysObj_Core::write_data(const DoutPrefixProvider *dpp,
RGWObjVersionTracker *objv_tracker,
optional_yield y)
{
- RGWSI_RADOS::Obj rados_obj;
+ rgw_rados_ref rados_obj;
int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
if (r < 0) {
ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
@@ -585,21 +582,17 @@ int RGWSI_SysObj_Core::pool_list_prefixed_objs(const DoutPrefixProvider *dpp,
{
bool is_truncated;
- auto rados_pool = rados_svc->pool(pool);
-
- auto op = rados_pool.op();
+ librados::IoCtx rados_pool;
+ rgw_init_ioctx(dpp, rados, pool, rados_pool, true, false);
- RGWAccessListFilterPrefix filter(prefix);
-
- int r = op.init(dpp, string(), &filter);
- if (r < 0) {
- return r;
- }
+ auto filter{rgw::AccessListFilterPrefix(prefix)};
+ std::string marker;
do {
vector<string> oids;
-#define MAX_OBJS_DEFAULT 1000
- int r = op.get_next(dpp, MAX_OBJS_DEFAULT, &oids, &is_truncated);
+ static constexpr auto MAX_OBJS_DEFAULT = 1000u;
+ int r = rgw_list_pool(dpp, rados_pool, MAX_OBJS_DEFAULT, filter, marker,
+ &oids, &is_truncated);
if (r < 0) {
return r;
}
@@ -619,16 +612,13 @@ int RGWSI_SysObj_Core::pool_list_objects_init(const DoutPrefixProvider *dpp,
const string& prefix,
RGWSI_SysObj::Pool::ListCtx *_ctx)
{
- _ctx->impl.emplace<PoolListImplInfo>(prefix);
+ _ctx->impl.emplace<PoolListImplInfo>(prefix, marker);
auto& ctx = static_cast<PoolListImplInfo&>(*_ctx->impl);
- ctx.pool = rados_svc->pool(pool);
- ctx.op = ctx.pool.op();
-
- int r = ctx.op.init(dpp, marker, &ctx.filter);
+ int r = rgw_init_ioctx(dpp, rados, pool, ctx.pool, true, false);
if (r < 0) {
- ldpp_dout(dpp, 10) << "failed to list objects pool_iterate_begin() returned r=" << r << dendl;
+ ldpp_dout(dpp, 10) << "failed to create IoCtx returned r=" << r << dendl;
return r;
}
return 0;
@@ -644,7 +634,8 @@ int RGWSI_SysObj_Core::pool_list_objects_next(const DoutPrefixProvider *dpp,
return -EINVAL;
}
auto& ctx = static_cast<PoolListImplInfo&>(*_ctx.impl);
- int r = ctx.op.get_next(dpp, max, oids, is_truncated);
+ int r = rgw_list_pool(dpp, ctx.pool, max, ctx.filter, ctx.marker, oids,
+ is_truncated);
if (r < 0) {
if(r != -ENOENT)
ldpp_dout(dpp, 10) << "failed to list objects pool_iterate returned r=" << r << dendl;
@@ -662,5 +653,6 @@ int RGWSI_SysObj_Core::pool_list_objects_get_marker(RGWSI_SysObj::Pool::ListCtx&
}
auto& ctx = static_cast<PoolListImplInfo&>(*_ctx.impl);
- return ctx.op.get_marker(marker);
+ *marker = ctx.marker;
+ return 0;
}
diff --git a/src/rgw/services/svc_sys_obj_core.h b/src/rgw/services/svc_sys_obj_core.h
index d02a37eee8a..d9dfc02f01b 100644
--- a/src/rgw/services/svc_sys_obj_core.h
+++ b/src/rgw/services/svc_sys_obj_core.h
@@ -5,7 +5,6 @@
#include "rgw_service.h"
-#include "svc_rados.h"
#include "svc_sys_obj.h"
#include "svc_sys_obj_core_types.h"
@@ -16,22 +15,22 @@ struct rgw_cache_entry_info;
class RGWSI_SysObj_Core : public RGWServiceInstance
{
- friend class RGWServices_Def;
+ friend struct RGWServices_Def;
friend class RGWSI_SysObj;
protected:
- RGWSI_RADOS *rados_svc{nullptr};
+ librados::Rados* rados{nullptr};
RGWSI_Zone *zone_svc{nullptr};
using GetObjState = RGWSI_SysObj_Core_GetObjState;
using PoolListImplInfo = RGWSI_SysObj_Core_PoolListImplInfo;
- void core_init(RGWSI_RADOS *_rados_svc,
+ void core_init(librados::Rados* rados_,
RGWSI_Zone *_zone_svc) {
- rados_svc = _rados_svc;
+ rados = rados_;
zone_svc = _zone_svc;
}
- int get_rados_obj(const DoutPrefixProvider *dpp, RGWSI_Zone *zone_svc, const rgw_raw_obj& obj, RGWSI_RADOS::Obj *pobj);
+ int get_rados_obj(const DoutPrefixProvider *dpp, RGWSI_Zone *zone_svc, const rgw_raw_obj& obj, rgw_rados_ref* pobj);
virtual int raw_stat(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj,
uint64_t *psize, real_time *pmtime,
diff --git a/src/rgw/services/svc_sys_obj_core_types.h b/src/rgw/services/svc_sys_obj_core_types.h
index 74f489d914e..a7b6aed0842 100644
--- a/src/rgw/services/svc_sys_obj_core_types.h
+++ b/src/rgw/services/svc_sys_obj_core_types.h
@@ -4,31 +4,33 @@
#pragma once
+#include "rgw_tools.h"
#include "rgw_service.h"
-#include "svc_rados.h"
#include "svc_sys_obj_types.h"
struct RGWSI_SysObj_Core_GetObjState : public RGWSI_SysObj_Obj_GetObjState {
- RGWSI_RADOS::Obj rados_obj;
+ rgw_rados_ref rados_obj;
bool has_rados_obj{false};
uint64_t last_ver{0};
RGWSI_SysObj_Core_GetObjState() {}
int get_rados_obj(const DoutPrefixProvider *dpp,
- RGWSI_RADOS *rados_svc,
+ librados::Rados* rados_svc,
RGWSI_Zone *zone_svc,
const rgw_raw_obj& obj,
- RGWSI_RADOS::Obj **pobj);
+ rgw_rados_ref** pobj);
};
struct RGWSI_SysObj_Core_PoolListImplInfo : public RGWSI_SysObj_Pool_ListInfo {
- RGWSI_RADOS::Pool pool;
- RGWSI_RADOS::Pool::List op;
- RGWAccessListFilterPrefix filter;
+ librados::IoCtx pool;
+ rgw::AccessListFilter filter;
+ std::string marker;
- RGWSI_SysObj_Core_PoolListImplInfo(const std::string& prefix) : op(pool.op()), filter(prefix) {}
+ RGWSI_SysObj_Core_PoolListImplInfo(const std::string& prefix,
+ const std::string& marker)
+ : filter(rgw::AccessListFilterPrefix(prefix)), marker(marker) {}
};
diff --git a/src/rgw/services/svc_tier_rados.cc b/src/rgw/services/svc_tier_rados.cc
index ca87e8aceb9..86ccb5eca56 100644
--- a/src/rgw/services/svc_tier_rados.cc
+++ b/src/rgw/services/svc_tier_rados.cc
@@ -7,9 +7,7 @@ using namespace std;
const std::string MP_META_SUFFIX = ".meta";
-MultipartMetaFilter::~MultipartMetaFilter() {}
-
-bool MultipartMetaFilter::filter(const string& name, string& key) {
+bool MultipartMetaFilter(const string& name, string& key) {
// the length of the suffix so we can skip past it
static const size_t MP_META_SUFFIX_LEN = MP_META_SUFFIX.length();
@@ -32,5 +30,3 @@ bool MultipartMetaFilter::filter(const string& name, string& key) {
return true;
}
-
-
diff --git a/src/rgw/services/svc_tier_rados.h b/src/rgw/services/svc_tier_rados.h
index a2036b93347..0327e465881 100644
--- a/src/rgw/services/svc_tier_rados.h
+++ b/src/rgw/services/svc_tier_rados.h
@@ -20,8 +20,6 @@
#include "rgw_service.h"
-#include "svc_rados.h"
-
extern const std::string MP_META_SUFFIX;
class RGWMPObj {
@@ -110,21 +108,14 @@ public:
* the name provided is such. It will also extract the key used for
* bucket index shard calculation from the adorned name.
*/
-class MultipartMetaFilter : public RGWAccessListFilter {
-public:
- MultipartMetaFilter() {}
-
- virtual ~MultipartMetaFilter() override;
-
- /**
- * @param name [in] The object name as it appears in the bucket index.
- * @param key [out] An output parameter that will contain the bucket
- * index key if this entry is in the form of a multipart meta object.
- * @return true if the name provided is in the form of a multipart meta
- * object, false otherwise
- */
- bool filter(const std::string& name, std::string& key) override;
-};
+/**
+ * @param name [in] The object name as it appears in the bucket index.
+ * @param key [out] An output parameter that will contain the bucket
+ * index key if this entry is in the form of a multipart meta object.
+ * @return true if the name provided is in the form of a multipart meta
+ * object, false otherwise
+ */
+bool MultipartMetaFilter(const std::string& name, std::string& key);
class RGWSI_Tier_RADOS : public RGWServiceInstance
{
diff --git a/src/rgw/services/svc_user.h b/src/rgw/services/svc_user.h
index 1cb459d31cb..f53ec49f259 100644
--- a/src/rgw/services/svc_user.h
+++ b/src/rgw/services/svc_user.h
@@ -20,9 +20,9 @@
#include "svc_meta_be.h"
#include "rgw_service.h"
+#include "rgw_sal_fwd.h"
class RGWUserBuckets;
-class RGWGetUserStats_CB;
class RGWSI_User : public RGWServiceInstance
{
@@ -122,6 +122,7 @@ public:
optional_yield y) = 0; /* last time a stats update was done */
virtual int read_stats_async(const DoutPrefixProvider *dpp,
- const rgw_user& user, RGWGetUserStats_CB *cb) = 0;
+ const rgw_user& user,
+ boost::intrusive_ptr<rgw::sal::ReadStatsCB> cb) = 0;
};
diff --git a/src/rgw/services/svc_user_rados.cc b/src/rgw/services/svc_user_rados.cc
index c99af935415..dd48bc4c01e 100644
--- a/src/rgw/services/svc_user_rados.cc
+++ b/src/rgw/services/svc_user_rados.cc
@@ -67,14 +67,14 @@ RGWSI_User_RADOS::RGWSI_User_RADOS(CephContext *cct): RGWSI_User(cct) {
RGWSI_User_RADOS::~RGWSI_User_RADOS() {
}
-void RGWSI_User_RADOS::init(RGWSI_RADOS *_rados_svc,
+void RGWSI_User_RADOS::init(librados::Rados* rados_,
RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc,
RGWSI_SysObj_Cache *_cache_svc, RGWSI_Meta *_meta_svc,
RGWSI_MetaBackend *_meta_be_svc,
RGWSI_SyncModules *_sync_modules_svc)
{
svc.user = this;
- svc.rados = _rados_svc;
+ rados = rados_;
svc.zone = _zone_svc;
svc.sysobj = _sysobj_svc;
svc.cache = _cache_svc;
@@ -602,8 +602,8 @@ int RGWSI_User_RADOS::get_user_info_by_access_key(RGWSI_MetaBackend::Context *ct
int RGWSI_User_RADOS::cls_user_update_buckets(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries, bool add, optional_yield y)
{
- auto rados_obj = svc.rados->obj(obj);
- int r = rados_obj.open(dpp);
+ rgw_rados_ref rados_obj;
+ int r = rgw_get_rados_ref(dpp, rados, obj, &rados_obj);
if (r < 0) {
return r;
}
@@ -628,8 +628,8 @@ int RGWSI_User_RADOS::cls_user_add_bucket(const DoutPrefixProvider *dpp, rgw_raw
int RGWSI_User_RADOS::cls_user_remove_bucket(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, const cls_user_bucket& bucket, optional_yield y)
{
- auto rados_obj = svc.rados->obj(obj);
- int r = rados_obj.open(dpp);
+ rgw_rados_ref rados_obj;
+ int r = rgw_get_rados_ref(dpp, rados, obj, &rados_obj);
if (r < 0) {
return r;
}
@@ -716,8 +716,8 @@ int RGWSI_User_RADOS::cls_user_list_buckets(const DoutPrefixProvider *dpp,
bool * const truncated,
optional_yield y)
{
- auto rados_obj = svc.rados->obj(obj);
- int r = rados_obj.open(dpp);
+ rgw_rados_ref rados_obj;
+ int r = rgw_get_rados_ref(dpp, rados, obj, &rados_obj);
if (r < 0) {
return r;
}
@@ -804,12 +804,14 @@ int RGWSI_User_RADOS::reset_bucket_stats(const DoutPrefixProvider *dpp,
int RGWSI_User_RADOS::cls_user_reset_stats(const DoutPrefixProvider *dpp, const rgw_user& user, optional_yield y)
{
rgw_raw_obj obj = get_buckets_obj(user);
- auto rados_obj = svc.rados->obj(obj);
- int rval, r = rados_obj.open(dpp);
+ rgw_rados_ref rados_obj;
+ int r = rgw_get_rados_ref(dpp, rados, obj, &rados_obj);
if (r < 0) {
return r;
}
+ int rval;
+
cls_user_reset_stats2_op call;
cls_user_reset_stats2_ret ret;
@@ -841,11 +843,12 @@ int RGWSI_User_RADOS::complete_flush_stats(const DoutPrefixProvider *dpp,
const rgw_user& user, optional_yield y)
{
rgw_raw_obj obj = get_buckets_obj(user);
- auto rados_obj = svc.rados->obj(obj);
- int r = rados_obj.open(dpp);
+ rgw_rados_ref rados_obj;
+ int r = rgw_get_rados_ref(dpp, rados, obj, &rados_obj);
if (r < 0) {
return r;
}
+
librados::ObjectWriteOperation op;
::cls_user_complete_stats_sync(op);
return rados_obj.operate(dpp, &op, y);
@@ -856,8 +859,8 @@ int RGWSI_User_RADOS::cls_user_get_header(const DoutPrefixProvider *dpp,
optional_yield y)
{
rgw_raw_obj obj = get_buckets_obj(user);
- auto rados_obj = svc.rados->obj(obj);
- int r = rados_obj.open(dpp);
+ rgw_rados_ref rados_obj;
+ int r = rgw_get_rados_ref(dpp, rados, obj, &rados_obj);
if (r < 0) {
return r;
}
@@ -871,15 +874,13 @@ int RGWSI_User_RADOS::cls_user_get_header(const DoutPrefixProvider *dpp,
int RGWSI_User_RADOS::cls_user_get_header_async(const DoutPrefixProvider *dpp, const string& user_str, RGWGetUserHeader_CB *cb)
{
rgw_raw_obj obj = get_buckets_obj(rgw_user(user_str));
- auto rados_obj = svc.rados->obj(obj);
- int r = rados_obj.open(dpp);
+ rgw_rados_ref ref;
+ int r = rgw_get_rados_ref(dpp, rados, obj, &ref);
if (r < 0) {
return r;
}
- auto& ref = rados_obj.get_ref();
-
- r = ::cls_user_get_header_async(ref.pool.ioctx(), ref.obj.oid, cb);
+ r = ::cls_user_get_header_async(ref.ioctx, ref.obj.oid, cb);
if (r < 0) {
return r;
}
@@ -927,36 +928,32 @@ int RGWSI_User_RADOS::read_stats(const DoutPrefixProvider *dpp,
}
class RGWGetUserStatsContext : public RGWGetUserHeader_CB {
- RGWGetUserStats_CB *cb;
+ boost::intrusive_ptr<rgw::sal::ReadStatsCB> cb;
public:
- explicit RGWGetUserStatsContext(RGWGetUserStats_CB * const cb)
- : cb(cb) {}
+ explicit RGWGetUserStatsContext(boost::intrusive_ptr<rgw::sal::ReadStatsCB> cb)
+ : cb(std::move(cb)) {}
void handle_response(int r, cls_user_header& header) override {
const cls_user_stats& hs = header.stats;
- if (r >= 0) {
- RGWStorageStats stats;
-
- stats.size = hs.total_bytes;
- stats.size_rounded = hs.total_bytes_rounded;
- stats.num_objects = hs.total_entries;
-
- cb->set_response(stats);
- }
+ RGWStorageStats stats;
- cb->handle_response(r);
+ stats.size = hs.total_bytes;
+ stats.size_rounded = hs.total_bytes_rounded;
+ stats.num_objects = hs.total_entries;
- cb->put();
+ cb->handle_response(r, stats);
+ cb.reset();
}
};
int RGWSI_User_RADOS::read_stats_async(const DoutPrefixProvider *dpp,
- const rgw_user& user, RGWGetUserStats_CB *_cb)
+ const rgw_user& user,
+ boost::intrusive_ptr<rgw::sal::ReadStatsCB> _cb)
{
string user_str = user.to_str();
- RGWGetUserStatsContext *cb = new RGWGetUserStatsContext(_cb);
+ RGWGetUserStatsContext *cb = new RGWGetUserStatsContext(std::move(_cb));
int r = cls_user_get_header_async(dpp, user_str, cb);
if (r < 0) {
delete cb;
diff --git a/src/rgw/services/svc_user_rados.h b/src/rgw/services/svc_user_rados.h
index 4712d1fdafd..6912327f0b1 100644
--- a/src/rgw/services/svc_user_rados.h
+++ b/src/rgw/services/svc_user_rados.h
@@ -23,7 +23,6 @@
#include "driver/rados/rgw_bucket.h" // FIXME: subclass dependency
-class RGWSI_RADOS;
class RGWSI_Zone;
class RGWSI_SysObj;
class RGWSI_SysObj_Cache;
@@ -34,7 +33,6 @@ class RGWSI_MetaBackend_Handler;
struct rgw_cache_entry_info;
class RGWGetUserHeader_CB;
-class RGWGetUserStats_CB;
template <class T>
class RGWChainedCacheImpl;
@@ -97,9 +95,10 @@ class RGWSI_User_RADOS : public RGWSI_User
int do_start(optional_yield, const DoutPrefixProvider *dpp) override;
public:
+ librados::Rados* rados{nullptr};
+
struct Svc {
RGWSI_User_RADOS *user{nullptr};
- RGWSI_RADOS *rados{nullptr};
RGWSI_Zone *zone{nullptr};
RGWSI_SysObj *sysobj{nullptr};
RGWSI_SysObj_Cache *cache{nullptr};
@@ -111,7 +110,7 @@ public:
RGWSI_User_RADOS(CephContext *cct);
~RGWSI_User_RADOS();
- void init(RGWSI_RADOS *_rados_svc,
+ void init(librados::Rados* rados_,
RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc,
RGWSI_SysObj_Cache *_cache_svc, RGWSI_Meta *_meta_svc,
RGWSI_MetaBackend *_meta_be_svc,
@@ -207,6 +206,6 @@ public:
optional_yield y) override; /* last time a stats update was done */
int read_stats_async(const DoutPrefixProvider *dpp, const rgw_user& user,
- RGWGetUserStats_CB *cb) override;
+ boost::intrusive_ptr<rgw::sal::ReadStatsCB> cb) override;
};
diff --git a/src/rgw/services/svc_zone.cc b/src/rgw/services/svc_zone.cc
index 8bb55e4513d..70cf40eb6cb 100644
--- a/src/rgw/services/svc_zone.cc
+++ b/src/rgw/services/svc_zone.cc
@@ -2,10 +2,10 @@
// vim: ts=8 sw=2 smarttab ft=cpp
#include "svc_zone.h"
-#include "svc_rados.h"
#include "svc_sys_obj.h"
#include "svc_sync_modules.h"
+#include "rgw_tools.h"
#include "rgw_zone.h"
#include "rgw_rest_conn.h"
#include "rgw_bucket_sync.h"
@@ -23,12 +23,12 @@ RGWSI_Zone::RGWSI_Zone(CephContext *cct) : RGWServiceInstance(cct)
}
void RGWSI_Zone::init(RGWSI_SysObj *_sysobj_svc,
- RGWSI_RADOS * _rados_svc,
+ librados::Rados* rados_,
RGWSI_SyncModules * _sync_modules_svc,
RGWSI_Bucket_Sync *_bucket_sync_svc)
{
sysobj_svc = _sysobj_svc;
- rados_svc = _rados_svc;
+ rados = rados_;
sync_modules_svc = _sync_modules_svc;
bucket_sync_svc = _bucket_sync_svc;
@@ -134,11 +134,6 @@ int RGWSI_Zone::do_start(optional_yield y, const DoutPrefixProvider *dpp)
assert(sysobj_svc->is_started()); /* if not then there's ordering issue */
- ret = rados_svc->start(y, dpp);
- if (ret < 0) {
- return ret;
- }
-
ret = realm->init(dpp, cct, sysobj_svc, y);
if (ret < 0 && ret != -ENOENT) {
ldpp_dout(dpp, 0) << "failed reading realm info: ret "<< ret << " " << cpp_strerror(-ret) << dendl;
diff --git a/src/rgw/services/svc_zone.h b/src/rgw/services/svc_zone.h
index 3891b73ddb4..c4a3a28f0d7 100644
--- a/src/rgw/services/svc_zone.h
+++ b/src/rgw/services/svc_zone.h
@@ -6,7 +6,6 @@
#include "rgw_service.h"
-class RGWSI_RADOS;
class RGWSI_SysObj;
class RGWSI_SyncModules;
class RGWSI_Bucket_Sync;
@@ -29,7 +28,7 @@ class RGWSI_Zone : public RGWServiceInstance
friend struct RGWServices_Def;
RGWSI_SysObj *sysobj_svc{nullptr};
- RGWSI_RADOS *rados_svc{nullptr};
+ librados::Rados* rados{nullptr};
RGWSI_SyncModules *sync_modules_svc{nullptr};
RGWSI_Bucket_Sync *bucket_sync_svc{nullptr};
@@ -58,7 +57,7 @@ class RGWSI_Zone : public RGWServiceInstance
std::unique_ptr<rgw_sync_policy_info> sync_policy;
void init(RGWSI_SysObj *_sysobj_svc,
- RGWSI_RADOS *_rados_svc,
+ librados::Rados* rados_,
RGWSI_SyncModules *_sync_modules_svc,
RGWSI_Bucket_Sync *_bucket_sync_svc);
int do_start(optional_yield y, const DoutPrefixProvider *dpp) override;
diff --git a/src/rgw/services/svc_zone_utils.cc b/src/rgw/services/svc_zone_utils.cc
index 712bb97c9ba..c809974e6d5 100644
--- a/src/rgw/services/svc_zone_utils.cc
+++ b/src/rgw/services/svc_zone_utils.cc
@@ -2,9 +2,12 @@
// vim: ts=8 sw=2 smarttab ft=cpp
#include "svc_zone_utils.h"
-#include "svc_rados.h"
#include "svc_zone.h"
+#undef FMT_HEADER_ONLY
+#define FMT_HEADER_ONLY 1
+#include <fmt/format.h>
+
#include "rgw_zone.h"
using namespace std;
@@ -18,26 +21,24 @@ int RGWSI_ZoneUtils::do_start(optional_yield, const DoutPrefixProvider *dpp)
string RGWSI_ZoneUtils::gen_host_id() {
/* uint64_t needs 16, two '-' separators and a trailing null */
- const string& zone_name = zone_svc->get_zone().name;
- const string& zonegroup_name = zone_svc->get_zonegroup().get_name();
- char charbuf[16 + zone_name.size() + zonegroup_name.size() + 2 + 1];
- snprintf(charbuf, sizeof(charbuf), "%llx-%s-%s", (unsigned long long)rados_svc->instance_id(), zone_name.c_str(), zonegroup_name.c_str());
- return string(charbuf);
+ return fmt::format("{}-{}-{}", rados->get_instance_id(),
+ zone_svc->get_zone().name,
+ zone_svc->get_zonegroup().get_name());
}
string RGWSI_ZoneUtils::unique_id(uint64_t unique_num)
{
- char buf[32];
- snprintf(buf, sizeof(buf), ".%llu.%llu", (unsigned long long)rados_svc->instance_id(), (unsigned long long)unique_num);
- string s = zone_svc->get_zone_params().get_id() + buf;
- return s;
+ return fmt::format("{}.{}.{}",
+ zone_svc->get_zone_params().get_id(),
+ rados->get_instance_id(),
+ unique_num);
}
void RGWSI_ZoneUtils::init_unique_trans_id_deps() {
- char buf[16 + 2 + 1]; /* uint64_t needs 16, 2 hyphens add further 2 */
-
- snprintf(buf, sizeof(buf), "-%llx-", (unsigned long long)rados_svc->instance_id());
- url_encode(string(buf) + zone_svc->get_zone().name, trans_id_suffix);
+ url_encode(fmt::format("-{}-{}",
+ rados->get_instance_id(),
+ zone_svc->get_zone().name),
+ trans_id_suffix);
}
/* In order to preserve compatibility with Swift API, transaction ID
diff --git a/src/rgw/services/svc_zone_utils.h b/src/rgw/services/svc_zone_utils.h
index 43e3fee8d93..41b9400335b 100644
--- a/src/rgw/services/svc_zone_utils.h
+++ b/src/rgw/services/svc_zone_utils.h
@@ -6,21 +6,20 @@
#include "rgw_service.h"
-class RGWSI_RADOS;
class RGWSI_Zone;
class RGWSI_ZoneUtils : public RGWServiceInstance
{
friend struct RGWServices_Def;
- RGWSI_RADOS *rados_svc{nullptr};
+ librados::Rados* rados{nullptr};
RGWSI_Zone *zone_svc{nullptr};
std::string trans_id_suffix;
- void init(RGWSI_RADOS *_rados_svc,
+ void init(librados::Rados* rados_,
RGWSI_Zone *_zone_svc) {
- rados_svc = _rados_svc;
+ rados = rados_;
zone_svc = _zone_svc;
}
diff --git a/src/test/common/CMakeLists.txt b/src/test/common/CMakeLists.txt
index c044daf662a..b2ed06ee306 100644
--- a/src/test/common/CMakeLists.txt
+++ b/src/test/common/CMakeLists.txt
@@ -390,6 +390,10 @@ target_link_libraries(unittest_blocked_completion Boost::system GTest::GTest)
add_executable(unittest_allocate_unique test_allocate_unique.cc)
add_ceph_unittest(unittest_allocate_unique)
+add_executable(unittest_versioned_variant test_versioned_variant.cc)
+add_ceph_unittest(unittest_versioned_variant)
+target_link_libraries(unittest_versioned_variant common)
+
if(WITH_SYSTEMD)
add_executable(unittest_journald_logger test_journald_logger.cc)
target_link_libraries(unittest_journald_logger ceph-common)
diff --git a/src/test/common/test_versioned_variant.cc b/src/test/common/test_versioned_variant.cc
new file mode 100644
index 00000000000..81f12c23c2b
--- /dev/null
+++ b/src/test/common/test_versioned_variant.cc
@@ -0,0 +1,341 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright contributors to the Ceph project
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "common/versioned_variant.h"
+#include <bitset>
+#include <string>
+#include <gtest/gtest.h>
+
+namespace {
+
+// type with custom encoding
+struct custom_type {
+ void encode(bufferlist& bl) const {
+ ENCODE_START(0, 0, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(0, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(custom_type);
+
+} // anonymous namespace
+
+namespace ceph {
+
+TEST(VersionedVariant, Monostate)
+{
+ using Variant = std::variant<std::monostate>;
+ bufferlist bl;
+ {
+ Variant in;
+ versioned_variant::encode(in, bl);
+ }
+ {
+ Variant out;
+ auto p = bl.cbegin();
+ ASSERT_NO_THROW(versioned_variant::decode(out, p));
+ EXPECT_TRUE(std::holds_alternative<std::monostate>(out));
+ }
+}
+
+TEST(VersionedVariant, Custom)
+{
+ using Variant = std::variant<std::monostate, custom_type>;
+ bufferlist bl;
+ {
+ Variant in = custom_type{};
+ versioned_variant::encode(in, bl);
+ }
+ {
+ Variant out;
+ auto p = bl.cbegin();
+ ASSERT_NO_THROW(versioned_variant::decode(out, p));
+ EXPECT_TRUE(std::holds_alternative<custom_type>(out));
+ }
+}
+
+TEST(VersionedVariant, DuplicateFirst)
+{
+ using Variant = std::variant<int, int>;
+ bufferlist bl;
+ {
+ Variant in;
+ in.emplace<0>(42);
+ versioned_variant::encode(in, bl);
+ }
+ {
+ Variant out;
+ auto p = bl.cbegin();
+ ASSERT_NO_THROW(versioned_variant::decode(out, p));
+ ASSERT_EQ(0, out.index());
+ EXPECT_EQ(42, std::get<0>(out));
+ }
+}
+
+TEST(VersionedVariant, DuplicateSecond)
+{
+ using Variant = std::variant<int, int>;
+ bufferlist bl;
+ {
+ Variant in;
+ in.emplace<1>(42);
+ versioned_variant::encode(in, bl);
+ }
+ {
+ Variant out;
+ auto p = bl.cbegin();
+ ASSERT_NO_THROW(versioned_variant::decode(out, p));
+ ASSERT_EQ(1, out.index());
+ EXPECT_EQ(42, std::get<1>(out));
+ }
+}
+
+TEST(VersionedVariant, EncodeOld)
+{
+ using V1 = std::variant<int>;
+ using V2 = std::variant<int, std::string>;
+
+ bufferlist bl;
+ {
+ // use V1 to encode the initial type
+ V1 in = 42;
+ versioned_variant::encode(in, bl);
+ }
+ {
+ // can decode as V1
+ V1 out;
+ auto p = bl.cbegin();
+ ASSERT_NO_THROW(versioned_variant::decode(out, p));
+ ASSERT_TRUE(std::holds_alternative<int>(out));
+ EXPECT_EQ(42, std::get<int>(out));
+ }
+ {
+ // can also decode as V2
+ V2 out;
+ auto p = bl.cbegin();
+ ASSERT_NO_THROW(versioned_variant::decode(out, p));
+ ASSERT_TRUE(std::holds_alternative<int>(out));
+ EXPECT_EQ(42, std::get<int>(out));
+ }
+}
+
+TEST(VersionedVariant, EncodeExisting)
+{
+ using V1 = std::variant<int>;
+ using V2 = std::variant<int, std::string>;
+
+ bufferlist bl;
+ {
+ // use V2 to encode the type shared with V1
+ V2 in = 42;
+ versioned_variant::encode(in, bl);
+ }
+ {
+ // can decode as V2
+ V2 out;
+ auto p = bl.cbegin();
+ ASSERT_NO_THROW(versioned_variant::decode(out, p));
+ ASSERT_TRUE(std::holds_alternative<int>(out));
+ EXPECT_EQ(42, std::get<int>(out));
+ }
+ {
+ // can also decode as V1
+ V1 out;
+ auto p = bl.cbegin();
+ ASSERT_NO_THROW(versioned_variant::decode(out, p));
+ ASSERT_TRUE(std::holds_alternative<int>(out));
+ EXPECT_EQ(42, std::get<int>(out));
+ }
+}
+
+TEST(VersionedVariant, EncodeNew)
+{
+ using V1 = std::variant<int>;
+ using V2 = std::variant<int, std::string>;
+
+ bufferlist bl;
+ {
+ // use V2 to encode the new string type
+ V2 in = "42";
+ versioned_variant::encode(in, bl);
+ }
+ {
+ // can decode as V2
+ V2 out;
+ auto p = bl.cbegin();
+ ASSERT_NO_THROW(versioned_variant::decode(out, p));
+ ASSERT_TRUE(std::holds_alternative<std::string>(out));
+ EXPECT_EQ("42", std::get<std::string>(out));
+ }
+ {
+ // can't decode as V1
+ V1 out;
+ auto p = bl.cbegin();
+ EXPECT_THROW(versioned_variant::decode(out, p), buffer::malformed_input);
+ }
+}
+
+
+TEST(ConvertedVariant, Custom)
+{
+ using Variant = std::variant<custom_type>;
+ bufferlist bl;
+ {
+ Variant in = custom_type{};
+ converted_variant::encode(in, bl);
+ }
+ {
+ Variant out;
+ auto p = bl.cbegin();
+ ASSERT_NO_THROW(converted_variant::decode(out, p));
+ EXPECT_TRUE(std::holds_alternative<custom_type>(out));
+ }
+}
+
+TEST(ConvertedVariant, DuplicateFirst)
+{
+ using Variant = std::variant<custom_type, int, int>;
+ bufferlist bl;
+ {
+ Variant in;
+ in.emplace<1>(42);
+ converted_variant::encode(in, bl);
+ }
+ {
+ Variant out;
+ auto p = bl.cbegin();
+ ASSERT_NO_THROW(converted_variant::decode(out, p));
+ ASSERT_EQ(1, out.index());
+ EXPECT_EQ(42, std::get<1>(out));
+ }
+}
+
+TEST(ConvertedVariant, DuplicateSecond)
+{
+ using Variant = std::variant<custom_type, int, int>;
+ bufferlist bl;
+ {
+ Variant in;
+ in.emplace<2>(42);
+ converted_variant::encode(in, bl);
+ }
+ {
+ Variant out;
+ auto p = bl.cbegin();
+ ASSERT_NO_THROW(converted_variant::decode(out, p));
+ ASSERT_EQ(2, out.index());
+ EXPECT_EQ(42, std::get<2>(out));
+ }
+}
+
+TEST(ConvertedVariant, EncodeOld)
+{
+ using V1 = custom_type;
+ using V2 = std::variant<custom_type, int>;
+
+ bufferlist bl;
+ {
+ // use V1 to encode the initial type
+ V1 in;
+ encode(in, bl);
+ }
+ {
+ // can decode as V1
+ V1 out;
+ auto p = bl.cbegin();
+ EXPECT_NO_THROW(decode(out, p));
+ }
+ {
+ // can also decode as V2
+ V2 out;
+ auto p = bl.cbegin();
+ ASSERT_NO_THROW(converted_variant::decode(out, p));
+ EXPECT_TRUE(std::holds_alternative<custom_type>(out));
+ }
+}
+
+TEST(ConvertedVariant, EncodeExisting)
+{
+ using V1 = custom_type;
+ using V2 = std::variant<custom_type, int>;
+
+ bufferlist bl;
+ {
+ // use V2 to encode the type shared with V1
+ V2 in;
+ converted_variant::encode(in, bl);
+ }
+ {
+ // can decode as V2
+ V2 out;
+ auto p = bl.cbegin();
+ ASSERT_NO_THROW(converted_variant::decode(out, p));
+ EXPECT_TRUE(std::holds_alternative<custom_type>(out));
+ }
+ {
+ // can also decode as V1
+ V1 out;
+ auto p = bl.cbegin();
+ EXPECT_NO_THROW(decode(out, p));
+ }
+}
+
+TEST(ConvertedVariant, EncodeNew)
+{
+ using V1 = custom_type;
+ using V2 = std::variant<custom_type, int>;
+
+ bufferlist bl;
+ {
+ // use V2 to encode the new type
+ V2 in = 42;
+ converted_variant::encode(in, bl);
+ }
+ {
+ // can decode as V2
+ V2 out;
+ auto p = bl.cbegin();
+ ASSERT_NO_THROW(converted_variant::decode(out, p));
+ ASSERT_TRUE(std::holds_alternative<int>(out));
+ EXPECT_EQ(42, std::get<int>(out));
+ }
+ {
+ // can't decode as V1
+ V1 out;
+ auto p = bl.cbegin();
+ EXPECT_THROW(decode(out, p), buffer::malformed_input);
+ }
+}
+
+TEST(Variant, GenerateTestInstances)
+{
+ using Variant = std::variant<int, bool, double>;
+
+ std::bitset<std::variant_size_v<Variant>> bits;
+ ASSERT_TRUE(bits.none());
+
+ std::list<Variant> instances;
+ generate_test_instances(instances);
+
+ for (const auto& v : instances) {
+ bits.set(v.index());
+ }
+
+ EXPECT_TRUE(bits.all());
+}
+
+} // namespace ceph
diff --git a/src/test/librbd/CMakeLists.txt b/src/test/librbd/CMakeLists.txt
index 0ae29b8bf18..c3f0edbea5d 100644
--- a/src/test/librbd/CMakeLists.txt
+++ b/src/test/librbd/CMakeLists.txt
@@ -19,7 +19,8 @@ set(librbd_test
test_Operations.cc
test_Trash.cc
journal/test_Entries.cc
- journal/test_Replay.cc)
+ journal/test_Replay.cc
+ journal/test_Stress.cc)
add_library(rbd_test STATIC ${librbd_test})
target_link_libraries(rbd_test PRIVATE
rbd_test_support
diff --git a/src/test/librbd/io/test_mock_ImageRequest.cc b/src/test/librbd/io/test_mock_ImageRequest.cc
index 9d6423d66c4..6ee67fe5f1c 100644
--- a/src/test/librbd/io/test_mock_ImageRequest.cc
+++ b/src/test/librbd/io/test_mock_ImageRequest.cc
@@ -16,12 +16,15 @@ namespace {
struct MockTestImageCtx;
struct MockTestJournal : public MockJournal {
- MOCK_METHOD4(append_write_event, uint64_t(uint64_t, size_t,
+ MOCK_METHOD3(append_write_event, uint64_t(const io::Extents&,
const bufferlist &, bool));
+ MOCK_METHOD3(append_write_same_event, uint64_t(const io::Extents&,
+ const bufferlist &, bool));
MOCK_METHOD5(append_compare_and_write_event, uint64_t(uint64_t, size_t,
const bufferlist &,
const bufferlist &,
bool));
+ MOCK_METHOD3(append_discard_event, uint64_t(const io::Extents&, uint32_t, bool));
MOCK_METHOD5(append_io_event_mock, uint64_t(const journal::EventEntry&,
uint64_t, size_t, bool, int));
uint64_t append_io_event(journal::EventEntry &&event_entry,
@@ -119,9 +122,10 @@ struct TestMockIoImageRequest : public TestMockFixture {
}
}
- void expect_journal_append_io_event(MockTestJournal &mock_journal, uint64_t journal_tid,
- uint64_t offset, size_t length) {
- EXPECT_CALL(mock_journal, append_io_event_mock(_, offset, length, _, _))
+ void expect_journal_append_discard_event(MockTestJournal &mock_journal,
+ uint64_t journal_tid,
+ const io::Extents& extents) {
+ EXPECT_CALL(mock_journal, append_discard_event(extents, _, _))
.WillOnce(Return(journal_tid));
}
@@ -386,8 +390,8 @@ TEST_F(TestMockIoImageRequest, PartialDiscardJournalAppendEnabled) {
InSequence seq;
expect_get_modify_timestamp(mock_image_ctx, false);
expect_is_journal_appending(mock_journal, true);
- expect_journal_append_io_event(mock_journal, 0, 16, 63);
- expect_journal_append_io_event(mock_journal, 1, 84, 100);
+ expect_journal_append_discard_event(mock_journal, 0,
+ {{16, 63}, {84, 100}});
expect_object_discard_request(mock_image_ctx, 0, 16, 63, 0);
expect_object_discard_request(mock_image_ctx, 0, 84, 100, 0);
@@ -419,8 +423,8 @@ TEST_F(TestMockIoImageRequest, TailDiscardJournalAppendEnabled) {
InSequence seq;
expect_get_modify_timestamp(mock_image_ctx, false);
expect_is_journal_appending(mock_journal, true);
- expect_journal_append_io_event(
- mock_journal, 0, ictx->layout.object_size - 1024, 1024);
+ expect_journal_append_discard_event(
+ mock_journal, 0, {{ictx->layout.object_size - 1024, 1024}});
expect_object_discard_request(
mock_image_ctx, 0, ictx->layout.object_size - 1024, 1024, 0);
@@ -452,7 +456,7 @@ TEST_F(TestMockIoImageRequest, PruneRequiredDiscardJournalAppendEnabled) {
InSequence seq;
expect_get_modify_timestamp(mock_image_ctx, false);
expect_is_journal_appending(mock_journal, true);
- EXPECT_CALL(mock_journal, append_io_event_mock(_, _, _, _, _)).Times(0);
+ EXPECT_CALL(mock_journal, append_discard_event(_, _, _)).Times(0);
EXPECT_CALL(*mock_image_ctx.io_object_dispatcher, send(_)).Times(0);
C_SaferCond aio_comp_ctx;
@@ -482,7 +486,7 @@ TEST_F(TestMockIoImageRequest, LengthModifiedDiscardJournalAppendEnabled) {
InSequence seq;
expect_get_modify_timestamp(mock_image_ctx, false);
expect_is_journal_appending(mock_journal, true);
- expect_journal_append_io_event(mock_journal, 0, 32, 32);
+ expect_journal_append_discard_event(mock_journal, 0, {{32, 32}});
expect_object_discard_request(mock_image_ctx, 0, 32, 32, 0);
C_SaferCond aio_comp_ctx;
@@ -513,10 +517,9 @@ TEST_F(TestMockIoImageRequest, DiscardGranularityJournalAppendEnabled) {
InSequence seq;
expect_get_modify_timestamp(mock_image_ctx, false);
expect_is_journal_appending(mock_journal, true);
- expect_journal_append_io_event(mock_journal, 0, 32, 32);
- expect_journal_append_io_event(mock_journal, 1, 96, 64);
- expect_journal_append_io_event(
- mock_journal, 2, ictx->layout.object_size - 32, 32);
+ expect_journal_append_discard_event(
+ mock_journal, 0,
+ {{32, 32}, {96, 64}, {ictx->layout.object_size - 32, 32}});
expect_object_discard_request(mock_image_ctx, 0, 32, 32, 0);
expect_object_discard_request(mock_image_ctx, 0, 96, 64, 0);
expect_object_discard_request(
diff --git a/src/test/librbd/io/test_mock_ObjectRequest.cc b/src/test/librbd/io/test_mock_ObjectRequest.cc
index 0690b7722a0..c20c825018b 100644
--- a/src/test/librbd/io/test_mock_ObjectRequest.cc
+++ b/src/test/librbd/io/test_mock_ObjectRequest.cc
@@ -1926,7 +1926,7 @@ TEST_F(TestMockIoObjectRequest, ListSnapsWholeObject) {
ASSERT_EQ(0, open_image(m_image_name, &ictx));
MockTestImageCtx mock_image_ctx(*ictx);
- mock_image_ctx.parent = &mock_image_ctx;
+ mock_image_ctx.snaps = {3};
InSequence seq;
@@ -1937,13 +1937,120 @@ TEST_F(TestMockIoObjectRequest, ListSnapsWholeObject) {
clone_info.cloneid = 3;
clone_info.snaps = {3};
clone_info.overlap = std::vector<std::pair<uint64_t,uint64_t>>{{0, 1}};
- clone_info.size = 4194304;
+ clone_info.size = mock_image_ctx.layout.object_size;
snap_set.clones.push_back(clone_info);
clone_info.cloneid = CEPH_NOSNAP;
clone_info.snaps = {};
clone_info.overlap = {};
- clone_info.size = 4194304;
+ clone_info.size = mock_image_ctx.layout.object_size;
+ snap_set.clones.push_back(clone_info);
+
+ expect_list_snaps(mock_image_ctx, snap_set, 0);
+
+ {
+ SnapshotDelta snapshot_delta;
+ C_SaferCond ctx;
+ auto req = MockObjectListSnapsRequest::create(
+ &mock_image_ctx, 0, {{0, mock_image_ctx.layout.object_size - 1}},
+ {3, CEPH_NOSNAP}, 0, {}, &snapshot_delta, &ctx);
+ req->send();
+ ASSERT_EQ(0, ctx.wait());
+
+ SnapshotDelta expected_snapshot_delta;
+ expected_snapshot_delta[{CEPH_NOSNAP,CEPH_NOSNAP}].insert(
+ 1, mock_image_ctx.layout.object_size - 2,
+ {SPARSE_EXTENT_STATE_DATA, mock_image_ctx.layout.object_size - 2});
+ EXPECT_EQ(expected_snapshot_delta, snapshot_delta);
+ }
+
+ expect_list_snaps(mock_image_ctx, snap_set, 0);
+
+ {
+ SnapshotDelta snapshot_delta;
+ C_SaferCond ctx;
+ auto req = MockObjectListSnapsRequest::create(
+ &mock_image_ctx, 0, {{0, mock_image_ctx.layout.object_size - 1}},
+ {3, CEPH_NOSNAP}, LIST_SNAPS_FLAG_WHOLE_OBJECT, {}, &snapshot_delta,
+ &ctx);
+ req->send();
+ ASSERT_EQ(0, ctx.wait());
+
+ SnapshotDelta expected_snapshot_delta;
+ expected_snapshot_delta[{CEPH_NOSNAP,CEPH_NOSNAP}].insert(
+ 0, mock_image_ctx.layout.object_size - 1,
+ {SPARSE_EXTENT_STATE_DATA, mock_image_ctx.layout.object_size - 1});
+ EXPECT_EQ(expected_snapshot_delta, snapshot_delta);
+ }
+}
+
+TEST_F(TestMockIoObjectRequest, ListSnapsWholeObjectEndSize) {
+ librbd::ImageCtx *ictx;
+ ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+ MockTestImageCtx mock_image_ctx(*ictx);
+ mock_image_ctx.snaps = {3};
+
+ InSequence seq;
+
+ librados::snap_set_t snap_set;
+ snap_set.seq = 3;
+ librados::clone_info_t clone_info;
+
+ clone_info.cloneid = CEPH_NOSNAP;
+ clone_info.snaps = {};
+ clone_info.overlap = {};
+ // smaller than object extent (i.e. the op) to test end_size handling
+ clone_info.size = mock_image_ctx.layout.object_size - 2;
+ snap_set.clones.push_back(clone_info);
+
+ expect_list_snaps(mock_image_ctx, snap_set, 0);
+
+ {
+ SnapshotDelta snapshot_delta;
+ C_SaferCond ctx;
+ auto req = MockObjectListSnapsRequest::create(
+ &mock_image_ctx, 0, {{0, mock_image_ctx.layout.object_size - 1}},
+ {4, CEPH_NOSNAP}, 0, {}, &snapshot_delta, &ctx);
+ req->send();
+ ASSERT_EQ(0, ctx.wait());
+
+ EXPECT_TRUE(snapshot_delta.empty());
+ }
+
+ expect_list_snaps(mock_image_ctx, snap_set, 0);
+
+ {
+ SnapshotDelta snapshot_delta;
+ C_SaferCond ctx;
+ auto req = MockObjectListSnapsRequest::create(
+ &mock_image_ctx, 0, {{0, mock_image_ctx.layout.object_size - 1}},
+ {4, CEPH_NOSNAP}, LIST_SNAPS_FLAG_WHOLE_OBJECT, {}, &snapshot_delta,
+ &ctx);
+ req->send();
+ ASSERT_EQ(0, ctx.wait());
+
+ EXPECT_TRUE(snapshot_delta.empty());
+ }
+}
+
+TEST_F(TestMockIoObjectRequest, ListSnapsNoSnapsInSnapSet) {
+ librbd::ImageCtx *ictx;
+ ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+ MockTestImageCtx mock_image_ctx(*ictx);
+ mock_image_ctx.snaps = {3};
+
+ InSequence seq;
+
+ librados::snap_set_t snap_set;
+ snap_set.seq = 3;
+ librados::clone_info_t clone_info;
+
+ clone_info.cloneid = 3;
+ clone_info.snaps = {};
+ clone_info.overlap = {};
+ clone_info.size = 0;
snap_set.clones.push_back(clone_info);
expect_list_snaps(mock_image_ctx, snap_set, 0);
@@ -1960,7 +2067,7 @@ TEST_F(TestMockIoObjectRequest, ListSnapsWholeObject) {
expected_snapshot_delta[{CEPH_NOSNAP,CEPH_NOSNAP}].insert(
0, mock_image_ctx.layout.object_size - 1,
{SPARSE_EXTENT_STATE_DATA, mock_image_ctx.layout.object_size - 1});
- ASSERT_EQ(expected_snapshot_delta, snapshot_delta);
+ EXPECT_EQ(expected_snapshot_delta, snapshot_delta);
}
} // namespace io
diff --git a/src/test/librbd/journal/test_Entries.cc b/src/test/librbd/journal/test_Entries.cc
index c392fb9f88a..bb4b06c0368 100644
--- a/src/test/librbd/journal/test_Entries.cc
+++ b/src/test/librbd/journal/test_Entries.cc
@@ -196,6 +196,69 @@ TEST_F(TestJournalEntries, AioDiscard) {
ASSERT_EQ(234U, aio_discard_event.length);
}
+TEST_F(TestJournalEntries, AioDiscardWithPrune) {
+ REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+ // The discard path can create multiple image extents (ImageRequest.cc) in the
+ // case where the discard request needs to be pruned and multiple objects are
+ // involved in the request. This test ensures that journal event entries are
+ // queued up for each image extent.
+
+ // Create an image that is multiple objects so that we can force multiple
+ // image extents on the discard path.
+ CephContext* cct = reinterpret_cast<CephContext*>(_rados.cct());
+ auto object_size = 1ull << cct->_conf.get_val<uint64_t>("rbd_default_order");
+ auto image_size = 4 * object_size;
+
+ auto image_name = get_temp_image_name();
+ ASSERT_EQ(0, create_image_pp(m_rbd, m_ioctx, image_name, image_size));
+
+ librbd::ImageCtx *ictx;
+ ASSERT_EQ(0, open_image(image_name, &ictx));
+
+ ::journal::Journaler *journaler = create_journaler(ictx);
+ ASSERT_TRUE(journaler != NULL);
+
+ C_SaferCond cond_ctx;
+ auto c = librbd::io::AioCompletion::create(&cond_ctx);
+ c->get();
+ // We offset the discard by -4096 bytes and set discard granularity to 8192;
+ // this should cause two image extents to be formed in
+ // AbstractImageWriteRequest<I>::send_request().
+ api::Io<>::aio_discard(*ictx, c, object_size - 4096, 2 * object_size, 8192,
+ true);
+ ASSERT_EQ(0, c->wait_for_complete());
+ c->put();
+
+ for (uint64_t chunk = 0; chunk < 2; chunk++) {
+ auto offset = object_size;
+ auto size = object_size;
+ if (chunk == 1) {
+ offset = object_size * 2;
+ size = object_size - 8192;
+ }
+
+ ::journal::ReplayEntry replay_entry;
+ if (!journaler->try_pop_front(&replay_entry)) {
+ ASSERT_TRUE(wait_for_entries_available(ictx));
+ ASSERT_TRUE(journaler->try_pop_front(&replay_entry));
+ }
+
+ librbd::journal::EventEntry event_entry;
+ ASSERT_TRUE(get_event_entry(replay_entry, &event_entry));
+
+ ASSERT_EQ(librbd::journal::EVENT_TYPE_AIO_DISCARD,
+ event_entry.get_event_type());
+
+ librbd::journal::AioDiscardEvent aio_discard_event =
+ boost::get<librbd::journal::AioDiscardEvent>(event_entry.event);
+ ASSERT_EQ(offset, aio_discard_event.offset);
+ ASSERT_EQ(size, aio_discard_event.length);
+
+ journaler->committed(replay_entry);
+ }
+}
+
TEST_F(TestJournalEntries, AioFlush) {
REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
diff --git a/src/test/librbd/journal/test_Stress.cc b/src/test/librbd/journal/test_Stress.cc
new file mode 100644
index 00000000000..d3df9147ae6
--- /dev/null
+++ b/src/test/librbd/journal/test_Stress.cc
@@ -0,0 +1,121 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "test/librados/test_cxx.h"
+#include "test/librbd/test_fixture.h"
+#include "test/librbd/test_support.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "cls/journal/cls_journal_types.h"
+#include "cls/journal/cls_journal_client.h"
+#include "journal/Journaler.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/internal.h"
+#include "librbd/Journal.h"
+#include "librbd/Operations.h"
+#include "librbd/api/Io.h"
+#include "librbd/api/Snapshot.h"
+#include "librbd/io/AioCompletion.h"
+#include "librbd/io/ImageDispatchSpec.h"
+#include "librbd/io/ImageRequest.h"
+#include "librbd/io/ReadResult.h"
+#include "librbd/journal/Types.h"
+#include <boost/scope_exit.hpp>
+
+void register_test_journal_stress() {
+}
+
+namespace librbd {
+namespace journal {
+
+class TestJournalStress : public TestFixture {
+};
+
+TEST_F(TestJournalStress, DiscardWithPruneWriteOverlap) {
+ REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+ // Overlap discards and writes while discard pruning is occurring. This tests
+ // the conditions under which https://tracker.ceph.com/issues/63422 occurred.
+
+ // Create an image that is multiple objects so that we can force multiple
+ // image extents on the discard path.
+ int order = 22;
+ auto object_size = uint64_t{1} << order;
+ auto image_size = 4 * object_size;
+
+ // Write-around cache required for overlapping I/O delays.
+ std::map<std::string, std::string> config;
+ config["rbd_cache"] = "true";
+ config["rbd_cache_policy"] = "writearound";
+ config["rbd_cache_max_dirty"] = std::to_string(image_size);
+ config["rbd_cache_writethrough_until_flush"] = "false";
+ // XXX: Work around https://tracker.ceph.com/issues/63681, which this test
+ // exposes when run under Valgrind.
+ config["librados_thread_count"] = "15";
+
+ librados::Rados rados;
+ ASSERT_EQ("", connect_cluster_pp(rados, config));
+
+ librados::IoCtx ioctx;
+ ASSERT_EQ(0, rados.ioctx_create(_pool_name.c_str(), ioctx));
+
+ uint64_t features;
+ ASSERT_TRUE(::get_features(&features));
+ auto image_name = get_temp_image_name();
+ ASSERT_EQ(0, create_image_full_pp(m_rbd, ioctx, image_name, image_size,
+ features, false, &order));
+
+ auto ictx = new librbd::ImageCtx(image_name, "", nullptr, ioctx, false);
+ ASSERT_EQ(0, ictx->state->open(0));
+ BOOST_SCOPE_EXIT(ictx) {
+ ictx->state->close();
+ } BOOST_SCOPE_EXIT_END;
+
+ std::thread write_thread(
+ [ictx, object_size]() {
+ std::string payload(object_size, '1');
+
+ for (auto i = 0; i < 200; i++) {
+ // Alternate overlaps with the two objects that the discard below
+ // touches.
+ for (auto offset = object_size;
+ offset < object_size * 3;
+ offset += object_size) {
+ bufferlist payload_bl;
+ payload_bl.append(payload);
+ auto aio_comp = new librbd::io::AioCompletion();
+ api::Io<>::aio_write(*ictx, aio_comp, offset, payload.size(),
+ std::move(payload_bl), 0, true);
+ ASSERT_EQ(0, aio_comp->wait_for_complete());
+ aio_comp->release();
+ }
+ }
+ }
+ );
+
+ auto discard_exit = false;
+ std::thread discard_thread(
+ [ictx, object_size, &discard_exit]() {
+ while (!discard_exit) {
+ // We offset the discard by -4096 bytes and set discard granularity to
+ // 8192; this should cause two image extents to be formed in
+ // AbstractImageWriteRequest<I>::send_request() on objects 1 and 2,
+ // overlapping with the writes above.
+ auto aio_comp = new librbd::io::AioCompletion();
+ api::Io<>::aio_discard(*ictx, aio_comp, object_size - 4096,
+ 2 * object_size, 8192, true);
+ ASSERT_EQ(0, aio_comp->wait_for_complete());
+ aio_comp->release();
+ }
+ }
+ );
+
+ write_thread.join();
+ discard_exit = true;
+ discard_thread.join();
+}
+
+} // namespace journal
+} // namespace librbd
diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc
index f4bb74fe397..9d17c4d85cf 100644
--- a/src/test/librbd/test_librbd.cc
+++ b/src/test/librbd/test_librbd.cc
@@ -7358,61 +7358,6 @@ interval_set<uint64_t> round_diff_interval(const interval_set<uint64_t>& diff,
return rounded_diff;
}
-TEST_F(TestLibRBD, SnapDiff)
-{
- REQUIRE_FEATURE(RBD_FEATURE_FAST_DIFF);
-
- rados_ioctx_t ioctx;
- rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx);
-
- rbd_image_t image;
- int order = 0;
- std::string image_name = get_temp_image_name();
- uint64_t size = 100 << 20;
- ASSERT_EQ(0, create_image(ioctx, image_name.c_str(), size, &order));
- ASSERT_EQ(0, rbd_open(ioctx, image_name.c_str(), &image, nullptr));
-
- char test_data[TEST_IO_SIZE + 1];
- for (size_t i = 0; i < TEST_IO_SIZE; ++i) {
- test_data[i] = (char) (rand() % (126 - 33) + 33);
- }
- test_data[TEST_IO_SIZE] = '\0';
-
- ASSERT_PASSED(write_test_data, image, test_data, 0,
- TEST_IO_SIZE, LIBRADOS_OP_FLAG_FADVISE_NOCACHE);
-
- interval_set<uint64_t> diff;
- ASSERT_EQ(0, rbd_diff_iterate2(image, nullptr, 0, size, true, true,
- iterate_cb, &diff));
- EXPECT_EQ(1 << order, diff.size());
-
- ASSERT_EQ(0, rbd_snap_create(image, "snap1"));
- ASSERT_EQ(0, rbd_snap_create(image, "snap2"));
-
- diff.clear();
- ASSERT_EQ(0, rbd_diff_iterate2(image, nullptr, 0, size, true, true,
- iterate_cb, &diff));
- EXPECT_EQ(1 << order, diff.size());
-
- diff.clear();
- ASSERT_EQ(0, rbd_diff_iterate2(image, "snap1", 0, size, true, true,
- iterate_cb, &diff));
- EXPECT_EQ(0, diff.size());
-
- diff.clear();
- ASSERT_EQ(0, rbd_diff_iterate2(image, "snap2", 0, size, true, true,
- iterate_cb, &diff));
- EXPECT_EQ(0, diff.size());
-
- ASSERT_EQ(0, rbd_snap_remove(image, "snap1"));
- ASSERT_EQ(0, rbd_snap_remove(image, "snap2"));
-
- ASSERT_EQ(0, rbd_close(image));
- ASSERT_EQ(0, rbd_remove(ioctx, image_name.c_str()));
-
- rados_ioctx_destroy(ioctx);
-}
-
template <typename T>
class DiffIterateTest : public TestLibRBD {
public:
@@ -7501,12 +7446,276 @@ ostream& operator<<(ostream & o, const diff_extent& e) {
int vector_iterate_cb(uint64_t off, size_t len, int exists, void *arg)
{
- cout << "iterate_cb " << off << "~" << len << std::endl;
+ //cout << "iterate_cb " << off << "~" << len << std::endl;
vector<diff_extent> *diff = static_cast<vector<diff_extent> *>(arg);
diff->push_back(diff_extent(off, len, exists, 0));
return 0;
}
+TYPED_TEST(DiffIterateTest, DiffIterateDeterministic)
+{
+ REQUIRE(!is_feature_enabled(RBD_FEATURE_STRIPINGV2));
+
+ rados_ioctx_t ioctx;
+ ASSERT_EQ(0, rados_ioctx_create(this->_cluster, this->m_pool_name.c_str(),
+ &ioctx));
+
+ rbd_image_t image;
+ int order = 22;
+ std::string name = this->get_temp_image_name();
+ uint64_t size = 20 << 20;
+
+ ASSERT_EQ(0, create_image(ioctx, name.c_str(), size, &order));
+ ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL));
+
+ uint64_t object_size = 0;
+ if (this->whole_object) {
+ object_size = 1 << order;
+ }
+
+ std::vector<diff_extent> extents;
+ ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(0u, extents.size());
+
+ ASSERT_EQ(0, rbd_snap_create(image, "snap1"));
+
+ std::string buf(256, '1');
+ ASSERT_EQ(256, rbd_write(image, 0, 256, buf.data()));
+ ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(1u, extents.size());
+ ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+ extents.clear();
+
+ ASSERT_EQ(0, rbd_snap_create(image, "snap2"));
+
+ ASSERT_EQ(256, rbd_write(image, 1 << order, 256, buf.data()));
+ ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(2u, extents.size());
+ ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+ ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]);
+ extents.clear();
+
+ ASSERT_EQ(0, rbd_snap_create(image, "snap3"));
+
+ // 1. beginning of time -> HEAD
+ ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(2u, extents.size());
+ ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+ ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]);
+ extents.clear();
+
+ // 2. snap1 -> HEAD
+ ASSERT_EQ(0, rbd_diff_iterate2(image, "snap1", 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(2u, extents.size());
+ ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+ ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]);
+ extents.clear();
+
+ // 3. snap2 -> HEAD
+ ASSERT_EQ(0, rbd_diff_iterate2(image, "snap2", 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(1u, extents.size());
+ ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[0]);
+ extents.clear();
+
+ // 4. snap3 -> HEAD
+ ASSERT_EQ(0, rbd_diff_iterate2(image, "snap3", 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(0u, extents.size());
+
+ ASSERT_PASSED(this->validate_object_map, image);
+ ASSERT_EQ(0, rbd_snap_set(image, "snap3"));
+
+ // 5. beginning of time -> snap3
+ ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(2u, extents.size());
+ ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+ ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]);
+ extents.clear();
+
+ // 6. snap1 -> snap3
+ ASSERT_EQ(0, rbd_diff_iterate2(image, "snap1", 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(2u, extents.size());
+ ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+ ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]);
+ extents.clear();
+
+ // 7. snap2 -> snap3
+ ASSERT_EQ(0, rbd_diff_iterate2(image, "snap2", 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(1u, extents.size());
+ ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[0]);
+ extents.clear();
+
+ ASSERT_PASSED(this->validate_object_map, image);
+ ASSERT_EQ(0, rbd_snap_set(image, "snap2"));
+
+ // 8. beginning of time -> snap2
+ ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(1u, extents.size());
+ ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+ extents.clear();
+
+ // 9. snap1 -> snap2
+ ASSERT_EQ(0, rbd_diff_iterate2(image, "snap1", 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(1u, extents.size());
+ ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+ extents.clear();
+
+ ASSERT_PASSED(this->validate_object_map, image);
+ ASSERT_EQ(0, rbd_snap_set(image, "snap1"));
+
+ // 10. beginning of time -> snap1
+ ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(0u, extents.size());
+
+ ASSERT_PASSED(this->validate_object_map, image);
+
+ ASSERT_EQ(0, rbd_close(image));
+ rados_ioctx_destroy(ioctx);
+}
+
+TYPED_TEST(DiffIterateTest, DiffIterateDeterministicPP)
+{
+ REQUIRE(!is_feature_enabled(RBD_FEATURE_STRIPINGV2));
+
+ librados::IoCtx ioctx;
+ ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx));
+
+ librbd::RBD rbd;
+ librbd::Image image;
+ int order = 22;
+ std::string name = this->get_temp_image_name();
+ uint64_t size = 20 << 20;
+
+ ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order));
+ ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL));
+
+ uint64_t object_size = 0;
+ if (this->whole_object) {
+ object_size = 1 << order;
+ }
+
+ std::vector<diff_extent> extents;
+ ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(0u, extents.size());
+
+ ASSERT_EQ(0, image.snap_create("snap1"));
+
+ ceph::bufferlist bl;
+ bl.append(std::string(256, '1'));
+ ASSERT_EQ(256, image.write(0, 256, bl));
+ ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(1u, extents.size());
+ ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+ extents.clear();
+
+ ASSERT_EQ(0, image.snap_create("snap2"));
+
+ ASSERT_EQ(256, image.write(1 << order, 256, bl));
+ ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(2u, extents.size());
+ ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+ ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]);
+ extents.clear();
+
+ ASSERT_EQ(0, image.snap_create("snap3"));
+
+ // 1. beginning of time -> HEAD
+ ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(2u, extents.size());
+ ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+ ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]);
+ extents.clear();
+
+ // 2. snap1 -> HEAD
+ ASSERT_EQ(0, image.diff_iterate2("snap1", 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(2u, extents.size());
+ ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+ ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]);
+ extents.clear();
+
+ // 3. snap2 -> HEAD
+ ASSERT_EQ(0, image.diff_iterate2("snap2", 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(1u, extents.size());
+ ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[0]);
+ extents.clear();
+
+ // 4. snap3 -> HEAD
+ ASSERT_EQ(0, image.diff_iterate2("snap3", 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(0u, extents.size());
+
+ ASSERT_PASSED(this->validate_object_map, image);
+ ASSERT_EQ(0, image.snap_set("snap3"));
+
+ // 5. beginning of time -> snap3
+ ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(2u, extents.size());
+ ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+ ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]);
+ extents.clear();
+
+ // 6. snap1 -> snap3
+ ASSERT_EQ(0, image.diff_iterate2("snap1", 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(2u, extents.size());
+ ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+ ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]);
+ extents.clear();
+
+ // 7. snap2 -> snap3
+ ASSERT_EQ(0, image.diff_iterate2("snap2", 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(1u, extents.size());
+ ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[0]);
+ extents.clear();
+
+ ASSERT_PASSED(this->validate_object_map, image);
+ ASSERT_EQ(0, image.snap_set("snap2"));
+
+ // 8. beginning of time -> snap2
+ ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(1u, extents.size());
+ ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+ extents.clear();
+
+ // 9. snap1 -> snap2
+ ASSERT_EQ(0, image.diff_iterate2("snap1", 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(1u, extents.size());
+ ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
+ extents.clear();
+
+ ASSERT_PASSED(this->validate_object_map, image);
+ ASSERT_EQ(0, image.snap_set("snap1"));
+
+ // 10. beginning of time -> snap1
+ ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object,
+ vector_iterate_cb, &extents));
+ ASSERT_EQ(0u, extents.size());
+
+ ASSERT_PASSED(this->validate_object_map, image);
+}
+
TYPED_TEST(DiffIterateTest, DiffIterateDiscard)
{
librados::IoCtx ioctx;
@@ -7655,50 +7864,6 @@ TYPED_TEST(DiffIterateTest, DiffIterateStress)
ASSERT_PASSED(this->validate_object_map, image);
}
-TYPED_TEST(DiffIterateTest, DiffIterateRegression6926)
-{
- librados::IoCtx ioctx;
- ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx));
-
- librbd::RBD rbd;
- librbd::Image image;
- int order = 0;
- std::string name = this->get_temp_image_name();
- uint64_t size = 20 << 20;
-
- ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order));
- ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL));
-
- uint64_t object_size = 0;
- if (this->whole_object) {
- object_size = 1 << order;
- }
- vector<diff_extent> extents;
- ceph::bufferlist bl;
-
- ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object,
- vector_iterate_cb, (void *) &extents));
- ASSERT_EQ(0u, extents.size());
-
- ASSERT_EQ(0, image.snap_create("snap1"));
- char data[256];
- memset(data, 1, sizeof(data));
- bl.append(data, 256);
- ASSERT_EQ(256, image.write(0, 256, bl));
-
- extents.clear();
- ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object,
- vector_iterate_cb, (void *) &extents));
- ASSERT_EQ(1u, extents.size());
- ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]);
-
- ASSERT_EQ(0, image.snap_set("snap1"));
- extents.clear();
- ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object,
- vector_iterate_cb, (void *) &extents));
- ASSERT_EQ(static_cast<size_t>(0), extents.size());
-}
-
TYPED_TEST(DiffIterateTest, DiffIterateParent)
{
REQUIRE_FEATURE(RBD_FEATURE_LAYERING);
diff --git a/src/test/librbd/test_main.cc b/src/test/librbd/test_main.cc
index 2ff9f69dea9..82b72b1ef7e 100644
--- a/src/test/librbd/test_main.cc
+++ b/src/test/librbd/test_main.cc
@@ -17,6 +17,7 @@ extern void register_test_image_watcher();
extern void register_test_internal();
extern void register_test_journal_entries();
extern void register_test_journal_replay();
+extern void register_test_journal_stress();
extern void register_test_migration();
extern void register_test_mirroring();
extern void register_test_mirroring_watcher();
@@ -37,6 +38,7 @@ int main(int argc, char **argv)
register_test_internal();
register_test_journal_entries();
register_test_journal_replay();
+ register_test_journal_stress();
register_test_migration();
register_test_mirroring();
register_test_mirroring_watcher();
diff --git a/src/test/librbd/test_mock_Journal.cc b/src/test/librbd/test_mock_Journal.cc
index 2fe74d2fe46..589695c50b3 100644
--- a/src/test/librbd/test_mock_Journal.cc
+++ b/src/test/librbd/test_mock_Journal.cc
@@ -460,7 +460,7 @@ public:
bl.append_zero(length);
std::shared_lock owner_locker{mock_image_ctx.owner_lock};
- return mock_journal->append_write_event(0, length, bl, false);
+ return mock_journal->append_write_event({{0, length}}, bl, false);
}
uint64_t when_append_compare_and_write_event(
diff --git a/src/test/objectstore/Allocator_test.cc b/src/test/objectstore/Allocator_test.cc
index 8204179b531..0e76c479002 100644
--- a/src/test/objectstore/Allocator_test.cc
+++ b/src/test/objectstore/Allocator_test.cc
@@ -587,8 +587,7 @@ TEST_P(AllocTest, test_alloc_47883)
PExtentVector extents;
auto need = 0x3f980000;
auto got = alloc->allocate(need, 0x10000, 0, (int64_t)0, &extents);
- EXPECT_GT(got, 0);
- EXPECT_EQ(got, 0x630000);
+ EXPECT_GE(got, 0x630000);
}
TEST_P(AllocTest, test_alloc_50656_best_fit)
diff --git a/src/test/objectstore/fastbmap_allocator_test.cc b/src/test/objectstore/fastbmap_allocator_test.cc
index c5953198505..710b3798f7a 100644
--- a/src/test/objectstore/fastbmap_allocator_test.cc
+++ b/src/test/objectstore/fastbmap_allocator_test.cc
@@ -625,6 +625,8 @@ TEST(TestAllocatorLevel01, test_l2_contiguous_alignment)
ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u);
{
+ // Original free space disposition (start chunk, count):
+ // <NC/2, NC/2>
size_t to_release = 2 * _1m + 0x1000;
// release 2M + 4K at the beginning
interval_vector_t r;
@@ -637,6 +639,8 @@ TEST(TestAllocatorLevel01, test_l2_contiguous_alignment)
ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u);
}
{
+ // Original free space disposition (start chunk, count):
+ // <0, 513>, <NC / 2, NC / 2>
// allocate 4K within the deallocated range
uint64_t allocated4 = 0;
interval_vector_t a4;
@@ -652,79 +656,91 @@ TEST(TestAllocatorLevel01, test_l2_contiguous_alignment)
ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u);
}
{
- // allocate 1M - should go to the second 1M chunk
+ // Original free space disposition (start chunk, count):
+ // <1, 512>, <NC / 2, NC / 2>
+ // allocate 1M - should go to offset 4096
uint64_t allocated4 = 0;
interval_vector_t a4;
al2.allocate_l2(_1m, _1m, &allocated4, &a4);
ASSERT_EQ(a4.size(), 1u);
ASSERT_EQ(allocated4, _1m);
- ASSERT_EQ(a4[0].offset, _1m);
+ ASSERT_EQ(a4[0].offset, 4096);
ASSERT_EQ(a4[0].length, _1m);
bins_overall.clear();
al2.collect_stats(bins_overall);
- ASSERT_EQ(bins_overall.size(), 3u);
- ASSERT_EQ(bins_overall[0], 1u);
- ASSERT_EQ(bins_overall[cbits((_1m - 0x1000) / 0x1000) - 1], 1u);
+ ASSERT_EQ(bins_overall.size(), 2u);
+ ASSERT_EQ(bins_overall[cbits(_1m / 0x1000) - 1], 1u);
ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u);
}
{
+ // Original free space disposition (start chunk, count):
+ // <257, 256>, <NC / 2, NC / 2>
// and allocate yet another 8K within the deallocated range
uint64_t allocated4 = 0;
interval_vector_t a4;
al2.allocate_l2(0x2000, 0x1000, &allocated4, &a4);
ASSERT_EQ(a4.size(), 1u);
ASSERT_EQ(allocated4, 0x2000u);
- ASSERT_EQ(a4[0].offset, 0x1000u);
+ ASSERT_EQ(a4[0].offset, _1m + 0x1000u);
ASSERT_EQ(a4[0].length, 0x2000u);
bins_overall.clear();
al2.collect_stats(bins_overall);
- ASSERT_EQ(bins_overall[0], 1u);
- ASSERT_EQ(bins_overall[cbits((_1m - 0x3000) / 0x1000) - 1], 1u);
+ ASSERT_EQ(bins_overall.size(), 2u);
+ ASSERT_EQ(bins_overall[cbits((_1m - 0x2000) / 0x1000) - 1], 1u);
ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u);
}
{
- // release just allocated 1M
+ // Original free space disposition (start chunk, count):
+ // <259, 254>, <NC / 2, NC / 2>
+ // release 4K~1M
interval_vector_t r;
- r.emplace_back(_1m, _1m);
+ r.emplace_back(0x1000, _1m);
al2.free_l2(r);
bins_overall.clear();
al2.collect_stats(bins_overall);
- ASSERT_EQ(bins_overall.size(), 2u);
- ASSERT_EQ(bins_overall[cbits((2 * _1m - 0x3000) / 0x1000) - 1], 1u);
+ ASSERT_EQ(bins_overall.size(), 3u);
+ //ASSERT_EQ(bins_overall[cbits((2 * _1m - 0x3000) / 0x1000) - 1], 1u);
+ ASSERT_EQ(bins_overall[cbits(_1m / 0x1000) - 1], 1u);
+ ASSERT_EQ(bins_overall[cbits((_1m - 0x2000) / 0x1000) - 1], 1u);
ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u);
}
{
- // allocate 3M - should go to the second 1M chunk and @capacity/2
+ // Original free space disposition (start chunk, count):
+ // <1, 257>, <259, 254>, <NC / 2, NC / 2>
+ // allocate 3M - should go to the first 1M chunk and @capacity/2
uint64_t allocated4 = 0;
interval_vector_t a4;
al2.allocate_l2(3 * _1m, _1m, &allocated4, &a4);
ASSERT_EQ(a4.size(), 2u);
ASSERT_EQ(allocated4, 3 * _1m);
- ASSERT_EQ(a4[0].offset, _1m);
+ ASSERT_EQ(a4[0].offset, 0x1000);
ASSERT_EQ(a4[0].length, _1m);
ASSERT_EQ(a4[1].offset, capacity / 2);
ASSERT_EQ(a4[1].length, 2 * _1m);
bins_overall.clear();
al2.collect_stats(bins_overall);
- ASSERT_EQ(bins_overall.size(), 3u);
- ASSERT_EQ(bins_overall[0], 1u);
- ASSERT_EQ(bins_overall[cbits((_1m - 0x3000) / 0x1000) - 1], 1u);
+ ASSERT_EQ(bins_overall.size(), 2u);
+ ASSERT_EQ(bins_overall[cbits((_1m - 0x2000) / 0x1000) - 1], 1u);
ASSERT_EQ(bins_overall[cbits((num_chunks - 512) / 2) - 1], 1u);
}
{
- // release allocated 1M in the second meg chunk except
+ // Original free space disposition (start chunk, count):
+ // <259, 254>, <NC / 2 - 512, NC / 2 - 512>
+ // release allocated 1M in the first meg chunk except
// the first 4K chunk
interval_vector_t r;
- r.emplace_back(_1m + 0x1000, _1m);
+ r.emplace_back(0x1000, _1m);
al2.free_l2(r);
bins_overall.clear();
al2.collect_stats(bins_overall);
ASSERT_EQ(bins_overall.size(), 3u);
ASSERT_EQ(bins_overall[cbits(_1m / 0x1000) - 1], 1u);
- ASSERT_EQ(bins_overall[cbits((_1m - 0x3000) / 0x1000) - 1], 1u);
+ ASSERT_EQ(bins_overall[cbits((_1m - 0x2000) / 0x1000) - 1], 1u);
ASSERT_EQ(bins_overall[cbits((num_chunks - 512) / 2) - 1], 1u);
}
{
+ // Original free space disposition (start chunk, count):
+ // <1, 256>, <259, 254>, <NC / 2 - 512, NC / 2 - 512>
// release 2M @(capacity / 2)
interval_vector_t r;
r.emplace_back(capacity / 2, 2 * _1m);
@@ -733,10 +749,12 @@ TEST(TestAllocatorLevel01, test_l2_contiguous_alignment)
al2.collect_stats(bins_overall);
ASSERT_EQ(bins_overall.size(), 3u);
ASSERT_EQ(bins_overall[cbits(_1m / 0x1000) - 1], 1u);
- ASSERT_EQ(bins_overall[cbits((_1m - 0x3000) / 0x1000) - 1], 1u);
+ ASSERT_EQ(bins_overall[cbits((_1m - 0x2000) / 0x1000) - 1], 1u);
ASSERT_EQ(bins_overall[cbits((num_chunks) / 2) - 1], 1u);
}
{
+ // Original free space disposition (start chunk, count):
+ // <1, 256>, <259, 254>, <NC / 2, NC / 2>
// allocate 4x512K - should go to the second halves of
// the first and second 1M chunks and @(capacity / 2)
uint64_t allocated4 = 0;
@@ -744,51 +762,54 @@ TEST(TestAllocatorLevel01, test_l2_contiguous_alignment)
al2.allocate_l2(2 * _1m, _1m / 2, &allocated4, &a4);
ASSERT_EQ(a4.size(), 3u);
ASSERT_EQ(allocated4, 2 * _1m);
- ASSERT_EQ(a4[0].offset, _1m / 2);
+ ASSERT_EQ(a4[1].offset, 0x1000);
+ ASSERT_EQ(a4[1].length, _1m);
+ ASSERT_EQ(a4[0].offset, _1m + 0x3000);
ASSERT_EQ(a4[0].length, _1m / 2);
- ASSERT_EQ(a4[1].offset, _1m + _1m / 2);
- ASSERT_EQ(a4[1].length, _1m / 2);
ASSERT_EQ(a4[2].offset, capacity / 2);
- ASSERT_EQ(a4[2].length, _1m);
+ ASSERT_EQ(a4[2].length, _1m / 2);
bins_overall.clear();
al2.collect_stats(bins_overall);
- ASSERT_EQ(bins_overall.size(), 3u);
- ASSERT_EQ(bins_overall[0], 1u);
- // below we have 512K - 4K & 512K - 12K chunks which both fit into
- // the same bin = 6
- ASSERT_EQ(bins_overall[6], 2u);
+ ASSERT_EQ(bins_overall.size(), 2u);
+ ASSERT_EQ(bins_overall[cbits((_1m - 0x2000 - 0x80000) / 0x1000) - 1], 1u);
ASSERT_EQ(bins_overall[cbits((num_chunks - 256) / 2) - 1], 1u);
}
{
- // cleanup first 2M except except the last 4K chunk
+ // Original free space disposition (start chunk, count):
+ // <387, 126>, <NC / 2 + 128, NC / 2 - 128>
+ // cleanup first 1536K except the last 4K chunk
interval_vector_t r;
- r.emplace_back(0, 2 * _1m - 0x1000);
+ r.emplace_back(0, _1m + _1m / 2 - 0x1000);
al2.free_l2(r);
bins_overall.clear();
al2.collect_stats(bins_overall);
ASSERT_EQ(bins_overall.size(), 3u);
- ASSERT_EQ(bins_overall[0], 1u);
- ASSERT_EQ(bins_overall[cbits((_2m - 0x1000) / 0x1000) - 1], 1u);
+ ASSERT_EQ(bins_overall[cbits((_1m + _1m / 2 - 0x1000) / 0x1000) - 1], 1u);
+ ASSERT_EQ(bins_overall[cbits((_1m - 0x2000 - 0x80000) / 0x1000) - 1], 1u);
ASSERT_EQ(bins_overall[cbits((num_chunks - 256) / 2) - 1], 1u);
}
{
- // release 2M @(capacity / 2)
+ // Original free space disposition (start chunk, count):
+ // <0, 383> <387, 126>, <NC / 2 + 128, NC / 2 - 128>
+ // release 512K @(capacity / 2)
interval_vector_t r;
- r.emplace_back(capacity / 2, 2 * _1m);
+ r.emplace_back(capacity / 2, _1m / 2);
al2.free_l2(r);
bins_overall.clear();
al2.collect_stats(bins_overall);
ASSERT_EQ(bins_overall.size(), 3u);
- ASSERT_EQ(bins_overall[0], 1u);
- ASSERT_EQ(bins_overall[cbits((_2m - 0x1000) / 0x1000) - 1], 1u);
+ ASSERT_EQ(bins_overall[cbits((_1m + _1m / 2 - 0x1000) / 0x1000) - 1], 1u);
+ ASSERT_EQ(bins_overall[cbits((_1m - 0x2000 - 0x80000) / 0x1000) - 1], 1u);
ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u);
}
{
- // allocate 132M using 4M granularity should go to (capacity / 2)
+ // Original free space disposition (start chunk, count):
+ // <0, 383> <387, 126>, <NC / 2, NC / 2>
+ // allocate 132M (=33792*4096) = using 4M granularity should go to (capacity / 2)
uint64_t allocated4 = 0;
interval_vector_t a4;
al2.allocate_l2(132 * _1m, 4 * _1m , &allocated4, &a4);
@@ -799,24 +820,40 @@ TEST(TestAllocatorLevel01, test_l2_contiguous_alignment)
bins_overall.clear();
al2.collect_stats(bins_overall);
ASSERT_EQ(bins_overall.size(), 3u);
+ ASSERT_EQ(bins_overall[cbits((_1m + _1m / 2 - 0x1000) / 0x1000) - 1], 1u);
+ ASSERT_EQ(bins_overall[cbits((_1m - 0x2000 - 0x80000) / 0x1000) - 1], 1u);
+ ASSERT_EQ(bins_overall[cbits(num_chunks / 2 - 33792) - 1], 1u);
}
{
- // cleanup left 4K chunk in the first 2M
+ // Original free space disposition (start chunk, count):
+ // <0, 383> <387, 126>, <NC / 2 + 33792, NC / 2 - 33792>
+ // cleanup remaining 4*4K chunks in the first 2M
interval_vector_t r;
- r.emplace_back(2 * _1m - 0x1000, 0x1000);
+ r.emplace_back(383 * 4096, 4 * 0x1000);
al2.free_l2(r);
bins_overall.clear();
al2.collect_stats(bins_overall);
ASSERT_EQ(bins_overall.size(), 2u);
+ ASSERT_EQ(bins_overall[cbits((2 * _1m + 0x1000) / 0x1000) - 1], 1u);
+ ASSERT_EQ(bins_overall[cbits(num_chunks / 2 - 33792) - 1], 1u);
}
{
+ // Original free space disposition (start chunk, count):
+ // <0, 513>, <NC / 2 + 33792, NC / 2 - 33792>
// release 132M @(capacity / 2)
interval_vector_t r;
r.emplace_back(capacity / 2, 132 * _1m);
al2.free_l2(r);
+ bins_overall.clear();
+ al2.collect_stats(bins_overall);
+ ASSERT_EQ(bins_overall.size(), 2u);
+ ASSERT_EQ(bins_overall[cbits((2 * _1m + 0x1000) / 0x1000) - 1], 1u);
+ ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u);
}
{
+ // Original free space disposition (start chunk, count):
+ // <0, 513>, <NC / 2, NC / 2>
// allocate 132M using 2M granularity should go to the first chunk and to
// (capacity / 2)
uint64_t allocated4 = 0;
@@ -827,14 +864,31 @@ TEST(TestAllocatorLevel01, test_l2_contiguous_alignment)
ASSERT_EQ(a4[0].length, 2 * _1m);
ASSERT_EQ(a4[1].offset, capacity / 2);
ASSERT_EQ(a4[1].length, 130 * _1m);
+
+ bins_overall.clear();
+ al2.collect_stats(bins_overall);
+
+ ASSERT_EQ(bins_overall.size(), 2u);
+ ASSERT_EQ(bins_overall[cbits(0)], 1u);
+ ASSERT_EQ(bins_overall[cbits(num_chunks / 2 - 33792) - 1], 1u);
}
{
+ // Original free space disposition (start chunk, count):
+ // <512, 1>, <NC / 2 + 33792, NC / 2 - 33792>
// release 130M @(capacity / 2)
interval_vector_t r;
r.emplace_back(capacity / 2, 132 * _1m);
al2.free_l2(r);
+ bins_overall.clear();
+ al2.collect_stats(bins_overall);
+
+ ASSERT_EQ(bins_overall.size(), 2u);
+ ASSERT_EQ(bins_overall[cbits(0)], 1u);
+ ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u);
}
{
+ // Original free space disposition (start chunk, count):
+ // <512,1>, <NC / 2, NC / 2>
// release 4K~16K
// release 28K~32K
// release 68K~24K
@@ -843,21 +897,46 @@ TEST(TestAllocatorLevel01, test_l2_contiguous_alignment)
r.emplace_back(0x7000, 0x8000);
r.emplace_back(0x11000, 0x6000);
al2.free_l2(r);
+
+ bins_overall.clear();
+ al2.collect_stats(bins_overall);
+
+ ASSERT_EQ(bins_overall.size(), 4u);
+ ASSERT_EQ(bins_overall[cbits(0)], 1u);
+ ASSERT_EQ(bins_overall[cbits(0x4000 / 0x1000) - 1], 2u); // accounts both 0x4000 & 0x6000
+ ASSERT_EQ(bins_overall[cbits(0x8000 / 0x1000) - 1], 1u);
+ ASSERT_EQ(bins_overall[cbits(num_chunks / 2) - 1], 1u);
}
{
- // allocate 32K using 16K granularity - should bypass the first
- // unaligned extent, use the second free extent partially given
- // the 16K alignment and then fallback to capacity / 2
+ // Original free space disposition (start chunk, count):
+ // <1, 4>, <7, 8>, <17, 6> <512,1>, <NC / 2, NC / 2>
+ // allocate 80K using 16K granularity
uint64_t allocated4 = 0;
interval_vector_t a4;
- al2.allocate_l2(0x8000, 0x4000, &allocated4, &a4);
- ASSERT_EQ(a4.size(), 2u);
- ASSERT_EQ(a4[0].offset, 0x8000u);
- ASSERT_EQ(a4[0].length, 0x4000u);
- ASSERT_EQ(a4[1].offset, capacity / 2);
+ al2.allocate_l2(0x14000, 0x4000, &allocated4, &a4);
+
+ ASSERT_EQ(a4.size(), 4);
+ ASSERT_EQ(a4[1].offset, 0x1000u);
ASSERT_EQ(a4[1].length, 0x4000u);
- }
+ ASSERT_EQ(a4[0].offset, 0x7000u);
+ ASSERT_EQ(a4[0].length, 0x8000u);
+ ASSERT_EQ(a4[2].offset, 0x11000u);
+ ASSERT_EQ(a4[2].length, 0x4000u);
+ ASSERT_EQ(a4[3].offset, capacity / 2);
+ ASSERT_EQ(a4[3].length, 0x4000u);
+
+ bins_overall.clear();
+ al2.collect_stats(bins_overall);
+ ASSERT_EQ(bins_overall.size(), 3u);
+ ASSERT_EQ(bins_overall[cbits(0)], 1u);
+ ASSERT_EQ(bins_overall[cbits(0x2000 / 0x1000) - 1], 1u);
+ ASSERT_EQ(bins_overall[cbits(num_chunks / 2 - 1) - 1], 1u);
+ }
+ {
+ // Original free space disposition (start chunk, count):
+ // <21, 2> <512,1>, <NC / 2 + 1, NC / 2 - 1>
+ }
}
std::cout << "Done L2 cont aligned" << std::endl;
}
@@ -913,7 +992,7 @@ TEST(TestAllocatorLevel01, test_4G_alloc_bug2)
al2.allocate_l2(0x3e000000, _1m, &allocated4, &a4);
ASSERT_EQ(a4.size(), 2u);
ASSERT_EQ(allocated4, 0x3e000000u);
- ASSERT_EQ(a4[0].offset, 0x5fed00000u);
+ ASSERT_EQ(a4[0].offset, 0x5fec30000u);
ASSERT_EQ(a4[0].length, 0x1300000u);
ASSERT_EQ(a4[1].offset, 0x628000000u);
ASSERT_EQ(a4[1].length, 0x3cd00000u);
diff --git a/src/test/objectstore/store_test.cc b/src/test/objectstore/store_test.cc
index 03dc1a87e1f..25e1721c403 100644
--- a/src/test/objectstore/store_test.cc
+++ b/src/test/objectstore/store_test.cc
@@ -92,7 +92,23 @@ static bool bl_eq(bufferlist& expected, bufferlist& actual)
return false;
}
+void dump_bluefs_stats()
+{
+ AdminSocket* admin_socket = g_ceph_context->get_admin_socket();
+ ceph_assert(admin_socket);
+
+ ceph::bufferlist in, out;
+ ostringstream err;
+ auto r = admin_socket->execute_command(
+ { "{\"prefix\": \"bluefs stats\"}" },
+ in, err, &out);
+ if (r != 0) {
+ cerr << "failure querying: " << cpp_strerror(r) << std::endl;
+ } else {
+ std::cout << std::string(out.c_str(), out.length()) << std::endl;
+ }
+}
template <typename T>
int queue_transaction(
@@ -9607,9 +9623,9 @@ TEST_P(StoreTestSpecificAUSize, BluestoreRepairSharedBlobTest) {
string key;
_key_encode_u64(1, &key);
bluestore_shared_blob_t sb(1);
- sb.ref_map.get(0x2000, block_size);
- sb.ref_map.get(0x4000, block_size);
- sb.ref_map.get(0x4000, block_size);
+ sb.ref_map.get(0x822000, block_size);
+ sb.ref_map.get(0x824000, block_size);
+ sb.ref_map.get(0x824000, block_size);
bufferlist bl;
encode(sb, bl);
bstore->inject_broken_shared_blob_key(key, bl);
@@ -10462,24 +10478,11 @@ void doManySetAttr(ObjectStore* store,
std::cout << "done" << std::endl;
do_check_fn(store);
- AdminSocket* admin_socket = g_ceph_context->get_admin_socket();
- ceph_assert(admin_socket);
-
- ceph::bufferlist in, out;
- ostringstream err;
-
- auto r = admin_socket->execute_command(
- { "{\"prefix\": \"bluefs stats\"}" },
- in, err, &out);
- if (r != 0) {
- cerr << "failure querying: " << cpp_strerror(r) << std::endl;
- } else {
- std::cout << std::string(out.c_str(), out.length()) << std::endl;
- }
+ dump_bluefs_stats();
test_obj.shutdown();
}
-TEST_P(StoreTestSpecificAUSize, SpilloverTest) {
+TEST_P(StoreTestSpecificAUSize, SpilloverLegacyTest) {
if (string(GetParam()) != "bluestore")
return;
if (smr) {
@@ -10509,24 +10512,12 @@ TEST_P(StoreTestSpecificAUSize, SpilloverTest) {
ceph_assert(bstore);
bstore->compact();
const PerfCounters* logger = bstore->get_bluefs_perf_counters();
- //experimentally it was discovered that this case results in 400+MB spillover
- //using lower 300MB threshold just to be safe enough
- std::cout << "DB used:" << logger->get(l_bluefs_db_used_bytes) << std::endl;
- std::cout << "SLOW used:" << logger->get(l_bluefs_slow_used_bytes) << std::endl;
- ASSERT_GE(logger->get(l_bluefs_slow_used_bytes), 16 * 1024 * 1024);
-
- struct store_statfs_t statfs;
- osd_alert_list_t alerts;
- int r = store->statfs(&statfs, &alerts);
- ASSERT_EQ(r, 0);
- ASSERT_EQ(alerts.count("BLUEFS_SPILLOVER"), 1);
- std::cout << "spillover_alert:" << alerts.find("BLUEFS_SPILLOVER")->second
- << std::endl;
+ ASSERT_GT(logger->get(l_bluefs_slow_used_bytes), 0);
}
);
}
-TEST_P(StoreTestSpecificAUSize, SpilloverFixedTest) {
+TEST_P(StoreTestSpecificAUSize, SpilloverLegacyFixedByFitToFastTest) {
if (string(GetParam()) != "bluestore")
return;
if (smr) {
@@ -10536,8 +10527,15 @@ TEST_P(StoreTestSpecificAUSize, SpilloverFixedTest) {
SetVal(g_conf(), "bluestore_block_db_create", "true");
SetVal(g_conf(), "bluestore_block_db_size", "3221225472");
- SetVal(g_conf(), "bluestore_volume_selection_policy", "use_some_extra");
- SetVal(g_conf(), "bluestore_volume_selection_reserved", "1"); // just use non-zero to enable
+ SetVal(g_conf(), "bluestore_volume_selection_policy", "fit_to_fast");
+ // original RocksDB settings used before https://github.com/ceph/ceph/pull/47221/
+ // which enable BlueFS spillover.
+ SetVal(g_conf(), "bluestore_rocksdb_options",
+ "compression=kNoCompression,max_write_buffer_number=4,"
+ "min_write_buffer_number_to_merge=1,recycle_log_file_num=4,"
+ "write_buffer_size=268435456,writable_file_max_buffer_size=0,"
+ "compaction_readahead_size=2097152,max_background_compactions=2,"
+ "max_total_wal_size=1073741824");
g_conf().apply_changes(nullptr);
@@ -10549,12 +10547,28 @@ TEST_P(StoreTestSpecificAUSize, SpilloverFixedTest) {
ceph_assert(bstore);
bstore->compact();
const PerfCounters* logger = bstore->get_bluefs_perf_counters();
- ASSERT_EQ(0, logger->get(l_bluefs_slow_used_bytes));
+ ASSERT_EQ(logger->get(l_bluefs_slow_used_bytes), 0);
}
);
}
-TEST_P(StoreTestSpecificAUSize, SpilloverFixed2Test) {
+void do_bluefs_write(BlueFS* _fs,
+ const char* dirname,
+ const char* filename,
+ uint64_t to_write)
+{
+ BlueFS::FileWriter* h;
+ ASSERT_EQ(0, _fs->open_for_write(dirname, filename, &h, false));
+ uint64_t buf_size = 1ull << 20;
+ string buf(buf_size, 'a');
+ for (uint64_t w = 0; w < to_write; w += buf_size) {
+ h->append(buf.c_str(), buf_size);
+ _fs->fsync(h);
+ }
+ _fs->close_writer(h);
+}
+
+TEST_P(StoreTestSpecificAUSize, SpilloverTest) {
if (string(GetParam()) != "bluestore")
return;
if (smr) {
@@ -10564,27 +10578,31 @@ TEST_P(StoreTestSpecificAUSize, SpilloverFixed2Test) {
SetVal(g_conf(), "bluestore_block_db_create", "true");
SetVal(g_conf(), "bluestore_block_db_size", "3221225472");
- SetVal(g_conf(), "bluestore_volume_selection_policy", "use_some_extra");
- //default 2.0 factor results in too high threshold, using less value
- // that results in less but still present spillover.
- SetVal(g_conf(), "bluestore_volume_selection_reserved_factor", "0.5");
+ SetVal(g_conf(), "bluestore_volume_selection_policy", "rocksdb_original");
g_conf().apply_changes(nullptr);
StartDeferred(65536);
- doManySetAttr(store.get(),
- [&](ObjectStore* _store) {
+ BlueStore* bstore = dynamic_cast<BlueStore*> (store.get());
+ ceph_assert(bstore);
+ BlueFS* fs = bstore->get_bluefs();
+ do_bluefs_write(fs, "db", "file1", 1ull << 30); // 1GB
+ do_bluefs_write(fs, "db.slow", "file2", 1ull << 30); // 1 GB
- BlueStore* bstore = dynamic_cast<BlueStore*> (_store);
- ceph_assert(bstore);
- bstore->compact();
- const PerfCounters* logger = bstore->get_bluefs_perf_counters();
- ASSERT_LE(logger->get(l_bluefs_slow_used_bytes), 300 * 1024 * 1024); // see SpilloverTest for 300MB choice rationale
- }
- );
+ dump_bluefs_stats();
+ const PerfCounters* logger = bstore->get_bluefs_perf_counters();
+ ASSERT_EQ(1ull << 30, logger->get(l_bluefs_slow_used_bytes));
+
+ struct store_statfs_t statfs;
+ osd_alert_list_t alerts;
+ int r = store->statfs(&statfs, &alerts);
+ ASSERT_EQ(r, 0);
+ ASSERT_EQ(alerts.count("BLUEFS_SPILLOVER"), 1);
+ std::cout << "spillover_alert:" << alerts.find("BLUEFS_SPILLOVER")->second
+ << std::endl;
}
-TEST_P(StoreTestSpecificAUSize, SpilloverFixed3Test) {
+TEST_P(StoreTestSpecificAUSize, SpilloverFixedCompletelyTest) {
if (string(GetParam()) != "bluestore")
return;
if (smr) {
@@ -10594,21 +10612,60 @@ TEST_P(StoreTestSpecificAUSize, SpilloverFixed3Test) {
SetVal(g_conf(), "bluestore_block_db_create", "true");
SetVal(g_conf(), "bluestore_block_db_size", "3221225472");
- SetVal(g_conf(), "bluestore_volume_selection_policy", "fit_to_fast");
+ SetVal(g_conf(), "bluestore_volume_selection_policy", "use_some_extra");
+ SetVal(g_conf(), "bluestore_volume_selection_reserved", "1"); // just use non-zero to enable
g_conf().apply_changes(nullptr);
StartDeferred(65536);
- doManySetAttr(store.get(),
- [&](ObjectStore* _store) {
+ BlueStore* bstore = dynamic_cast<BlueStore*> (store.get());
+ ceph_assert(bstore);
+ BlueFS* fs = bstore->get_bluefs();
+ do_bluefs_write(fs, "db", "file1", 1ull << 30); // 1GB
+ do_bluefs_write(fs, "db.slow", "file2", 1ull << 30); // 1 GB
- BlueStore* bstore = dynamic_cast<BlueStore*> (_store);
- ceph_assert(bstore);
- bstore->compact();
- const PerfCounters* logger = bstore->get_bluefs_perf_counters();
- ASSERT_EQ(logger->get(l_bluefs_slow_used_bytes), 0); // reffering to SpilloverFixedTest
- }
- );
+ dump_bluefs_stats();
+ const PerfCounters* logger = bstore->get_bluefs_perf_counters();
+ ASSERT_EQ(0, logger->get(l_bluefs_slow_used_bytes));
+}
+
+TEST_P(StoreTestSpecificAUSize, SpilloverFixedPartialTest) {
+ if (string(GetParam()) != "bluestore")
+ return;
+ if (smr) {
+ cout << "SKIP: (FIXME?) adjust me for smr at some point?" << std::endl;
+ return;
+ }
+
+ SetVal(g_conf(), "bluestore_block_db_create", "true");
+ SetVal(g_conf(), "bluestore_block_db_size", stringify(3ull << 30).c_str());
+ SetVal(g_conf(), "bluestore_volume_selection_policy", "use_some_extra");
+ //default 2.0 factor results in too high threshold, using less value
+ // that results in a reduced but existing spillover.
+ //
+ SetVal(g_conf(), "bluestore_volume_selection_reserved_factor", "1");
+
+ g_conf().apply_changes(nullptr);
+
+ StartDeferred(65536);
+ BlueStore* bstore = dynamic_cast<BlueStore*> (store.get());
+ ceph_assert(bstore);
+ BlueFS* fs = bstore->get_bluefs();
+ do_bluefs_write(fs, "db", "file1", 1ull << 30); // 1 GB
+ do_bluefs_write(fs, "db.slow", "file2", 1ull << 30); // 1 GB
+
+ dump_bluefs_stats();
+ const PerfCounters* logger = bstore->get_bluefs_perf_counters();
+ ASSERT_LT(100ull << 20, logger->get(l_bluefs_slow_used_bytes));
+ ASSERT_GT(1ull << 30, logger->get(l_bluefs_slow_used_bytes));
+
+ struct store_statfs_t statfs;
+ osd_alert_list_t alerts;
+ int r = store->statfs(&statfs, &alerts);
+ ASSERT_EQ(r, 0);
+ ASSERT_EQ(alerts.count("BLUEFS_SPILLOVER"), 1);
+ std::cout << "spillover_alert:" << alerts.find("BLUEFS_SPILLOVER")->second
+ << std::endl;
}
TEST_P(StoreTestSpecificAUSize, Ticket45195Repro) {
@@ -10783,19 +10840,7 @@ TEST_P(StoreTestSpecificAUSize, BluefsWriteInSingleDiskEnvTest) {
bstore->inject_bluefs_file("db.wal", "store_test_injection_wal", 1 << 20ul);
bstore->inject_bluefs_file("db", "store_test_injection_wal", 1 << 20ul);
- AdminSocket* admin_socket = g_ceph_context->get_admin_socket();
- ceph_assert(admin_socket);
-
- ceph::bufferlist in, out;
- ostringstream err;
- auto r = admin_socket->execute_command(
- { "{\"prefix\": \"bluefs stats\"}" },
- in, err, &out);
- if (r != 0) {
- cerr << "failure querying: " << cpp_strerror(r) << std::endl;
- } else {
- std::cout << std::string(out.c_str(), out.length()) << std::endl;
- }
+ dump_bluefs_stats();
}
TEST_P(StoreTestSpecificAUSize, BluefsWriteInNoWalDiskEnvTest) {
@@ -10816,20 +10861,7 @@ TEST_P(StoreTestSpecificAUSize, BluefsWriteInNoWalDiskEnvTest) {
bstore->inject_bluefs_file("db.wal", "store_test_injection_wal", 1 << 20ul);
bstore->inject_bluefs_file("db", "store_test_injection_wal", 1 << 20ul);
- AdminSocket* admin_socket = g_ceph_context->get_admin_socket();
- ceph_assert(admin_socket);
-
- ceph::bufferlist in, out;
- ostringstream err;
- auto r = admin_socket->execute_command(
- { "{\"prefix\": \"bluefs stats\"}" },
- in, err, &out);
- if (r != 0) {
- cerr << "failure querying: " << cpp_strerror(r) << std::endl;
- }
- else {
- std::cout << std::string(out.c_str(), out.length()) << std::endl;
- }
+ dump_bluefs_stats();
}
TEST_P(StoreTestOmapUpgrade, NoOmapHeader) {
@@ -11005,6 +11037,8 @@ int main(int argc, char **argv) {
g_ceph_context->_conf.set_val_or_die("bluestore_debug_randomize_serial_transaction",
"10");
+ g_ceph_context->_conf.set_val_or_die("bluefs_check_volume_selector_on_umount", "true");
+
g_ceph_context->_conf.set_val_or_die("bdev_debug_aio", "true");
// specify device size
diff --git a/src/test/pybind/test_rbd.py b/src/test/pybind/test_rbd.py
index 7b5f31b577a..0ce3c0dd90c 100644
--- a/src/test/pybind/test_rbd.py
+++ b/src/test/pybind/test_rbd.py
@@ -415,6 +415,18 @@ def test_remove_canceled(tmp_image):
assert_raises(OperationCanceled, RBD().remove, ioctx, image_name,
on_progress=progress_cb)
+def test_remove_with_progress_except():
+ create_image()
+ d = {'received_callback': False}
+ def progress_cb(current, total):
+ d['received_callback'] = True
+ raise Exception()
+
+ # exception is logged and ignored with a Cython warning:
+ # Exception ignored in: 'rbd.progress_callback'
+ RBD().remove(ioctx, image_name, on_progress=progress_cb)
+ eq(True, d['received_callback'])
+
def test_rename(tmp_image):
rbd = RBD()
image_name2 = get_temp_image_name()
@@ -1251,6 +1263,16 @@ class TestImage(object):
assert(comp.get_return_value() < 0)
eq(sys.getrefcount(comp), 2)
+ # test3: except case
+ def cbex(_, buf):
+ raise KeyError()
+
+ def test3():
+ comp = self.image.aio_read(IMG_SIZE, 20, cbex)
+ comp.wait_for_complete_and_cb()
+
+ assert_raises(KeyError, test3)
+
def test_aio_write(self):
retval = [None]
def cb(comp):
diff --git a/src/test/rgw/test_rgw_lc.cc b/src/test/rgw/test_rgw_lc.cc
index 83a4cac676d..d10b482cbfc 100644
--- a/src/test/rgw/test_rgw_lc.cc
+++ b/src/test/rgw/test_rgw_lc.cc
@@ -5,7 +5,6 @@
#include "rgw_lc.h"
#include "rgw_lc_s3.h"
#include <gtest/gtest.h>
-//#include <spawn/spawn.hpp>
#include <string>
#include <vector>
#include <stdexcept>
@@ -107,3 +106,239 @@ TEST(TestLCFilterInvalidAnd, XMLDoc3)
/* check our flags */
ASSERT_EQ(filter.get_flags(), uint32_t(LCFlagType::none));
}
+
+struct LCWorkTimeTests : ::testing::Test
+{
+ CephContext* cct;
+ std::unique_ptr<RGWLC::LCWorker> worker;
+
+ // expects input in the form of "%m/%d/%y %H:%M:%S"; e.g., "01/15/23 23:59:01"
+ utime_t get_utime_by_date_time_string(const std::string& date_time_str)
+ {
+ struct tm tm{};
+ struct timespec ts = {0};
+
+ strptime(date_time_str.c_str(), "%m/%d/%y %H:%M:%S", &tm);
+ ts.tv_sec = mktime(&tm);
+
+ return utime_t(ts);
+ }
+
+ // expects a map from input value (date & time string) to expected result (boolean)
+ void run_should_work_test(const auto& test_values_to_expectations_map) {
+ for (const auto& [date_time_str, expected_value] : test_values_to_expectations_map) {
+ auto ut = get_utime_by_date_time_string(date_time_str);
+ auto should_work = worker->should_work(ut);
+
+ ASSERT_EQ(should_work, expected_value)
+ << "input time: " << ut
+ << " expected: " << expected_value
+ << " should_work: " << should_work
+ << " work-time-window: " << cct->_conf->rgw_lifecycle_work_time << std::endl;
+ }
+ }
+
+ // expects a map from input value (a tuple of date & time strings) to expected result (seconds)
+ void run_schedule_next_start_time_test(const auto& test_values_to_expectations_map) {
+ for (const auto& [date_time_str_tuple, expected_value] : test_values_to_expectations_map) {
+ auto work_started_at = get_utime_by_date_time_string(std::get<0>(date_time_str_tuple));
+ auto work_completed_at = get_utime_by_date_time_string(std::get<1>(date_time_str_tuple));
+ auto wait_secs_till_next_start = worker->schedule_next_start_time(work_started_at, work_completed_at);
+
+ ASSERT_EQ(wait_secs_till_next_start, expected_value)
+ << "work_started_at: " << work_started_at
+ << " work_completed_at: " << work_completed_at
+ << " expected: " << expected_value
+ << " wait_secs_till_next_start: " << wait_secs_till_next_start
+ << " work-time-window: " << cct->_conf->rgw_lifecycle_work_time << std::endl;
+ }
+ }
+
+protected:
+
+ void SetUp() override {
+ cct = (new CephContext(CEPH_ENTITY_TYPE_ANY))->get();
+
+ cct->_conf->set_value("rgw_lc_max_wp_worker", 0, 0); // no need to create a real workpool
+ worker = std::make_unique<RGWLC::LCWorker>(nullptr, cct, nullptr, 0);
+ }
+
+ void TearDown() override {
+ worker.reset();
+ cct->put();
+ }
+};
+
+TEST_F(LCWorkTimeTests, ShouldWorkDefaultWorkTime)
+{
+ std::unordered_map<std::string, bool> test_values_to_expectations = {
+ {"01/01/23 00:00:00", true},
+ {"01/01/24 00:00:00", true}, // date is not relevant, but only the time-window
+ {"01/01/23 00:00:01", true},
+ {"01/01/23 03:00:00", true},
+ {"01/01/23 05:59:59", true},
+ {"01/01/23 06:00:00", true},
+ {"01/01/23 06:00:59", true}, // seconds don't matter, but only hours and minutes
+ {"01/01/23 06:01:00", false},
+ {"01/01/23 23:59:59", false},
+ {"01/02/23 23:59:59", false},
+ {"01/01/23 12:00:00", false},
+ {"01/01/23 14:00:00", false}
+ };
+
+ run_should_work_test(test_values_to_expectations);
+}
+
+TEST_F(LCWorkTimeTests, ShouldWorkCustomWorkTimeEndTimeInTheSameDay)
+{
+ cct->_conf->rgw_lifecycle_work_time = "14:00-16:00";
+
+ std::unordered_map<std::string, bool> test_values_to_expectations = {
+ {"01/01/23 00:00:00", false},
+ {"01/01/23 12:00:00", false},
+ {"01/01/24 13:59:59", false},
+ {"01/01/23 14:00:00", true},
+ {"01/01/23 16:00:00", true},
+ {"01/01/23 16:00:59", true},
+ {"01/01/23 16:01:00", false},
+ {"01/01/23 17:00:00", false},
+ {"01/01/23 23:59:59", false},
+ };
+
+ run_should_work_test(test_values_to_expectations);
+}
+
+TEST_F(LCWorkTimeTests, ShouldWorkCustomWorkTimeEndTimeInTheSameDay24Hours)
+{
+ cct->_conf->rgw_lifecycle_work_time = "00:00-23:59";
+
+ std::unordered_map<std::string, bool> test_values_to_expectations = {
+ {"01/01/23 23:59:00", true},
+ {"01/01/23 23:59:59", true},
+ {"01/01/23 00:00:00", true},
+ {"01/01/23 00:00:01", true},
+ {"01/01/23 00:01:00", true},
+ {"01/01/23 01:00:00", true},
+ {"01/01/23 12:00:00", true},
+ {"01/01/23 17:00:00", true},
+ {"01/01/23 23:00:00", true}
+ };
+
+ run_should_work_test(test_values_to_expectations);
+}
+
+
+TEST_F(LCWorkTimeTests, ShouldWorkCustomWorkTimeEndTimeInTheNextDay)
+{
+ cct->_conf->rgw_lifecycle_work_time = "14:00-01:00";
+
+ std::unordered_map<std::string, bool> test_values_to_expectations = {
+ {"01/01/23 13:59:00", false},
+ {"01/01/23 13:59:59", false},
+ {"01/01/24 14:00:00", true}, // used-to-fail
+ {"01/01/24 17:00:00", true}, // used-to-fail
+ {"01/01/24 23:59:59", true}, // used-to-fail
+ {"01/01/23 00:00:00", true}, // used-to-fail
+ {"01/01/23 00:59:59", true}, // used-to-fail
+ {"01/01/23 01:00:00", true}, // used-to-fail
+ {"01/01/23 01:00:59", true}, // used-to-fail
+ {"01/01/23 01:01:00", false},
+ {"01/01/23 05:00:00", false},
+ {"01/01/23 12:00:00", false},
+ {"01/01/23 13:00:00", false}
+ };
+
+ run_should_work_test(test_values_to_expectations);
+}
+
+TEST_F(LCWorkTimeTests, ShouldWorkCustomWorkTimeEndTimeInTheNextDay24Hours)
+{
+ cct->_conf->rgw_lifecycle_work_time = "14:00-13:59";
+
+ // all of the below cases used-to-fail
+ std::unordered_map<std::string, bool> test_values_to_expectations = {
+ {"01/01/23 00:00:00", true},
+ {"01/01/23 00:00:01", true},
+ {"01/01/23 00:01:00", true},
+ {"01/01/24 01:00:00", true},
+ {"01/01/24 12:00:00", true},
+ {"01/01/24 13:00:00", true},
+ {"01/01/24 13:59:00", true},
+ {"01/01/24 13:59:59", true},
+ {"01/01/23 14:00:00", true},
+ {"01/01/23 14:00:01", true},
+ {"01/01/23 14:01:00", true},
+ {"01/01/23 16:00:00", true},
+ {"01/01/23 23:59:00", true},
+ {"01/01/23 23:59:59", true},
+ };
+
+ run_should_work_test(test_values_to_expectations);
+}
+
+TEST_F(LCWorkTimeTests, ShouldWorkCustomWorkTimeEndTimeInTheNextDayIrregularMins)
+{
+ cct->_conf->rgw_lifecycle_work_time = "22:15-03:33";
+
+ std::unordered_map<std::string, bool> test_values_to_expectations = {
+ {"01/01/23 22:14:59", false},
+ {"01/01/23 22:15:00", true}, // used-to-fail
+ {"01/01/24 00:00:00", true}, // used-to-fail
+ {"01/01/24 01:00:00", true}, // used-to-fail
+ {"01/01/24 02:00:00", true}, // used-to-fail
+ {"01/01/23 03:33:00", true}, // used-to-fail
+ {"01/01/23 03:33:59", true}, // used-to-fail
+ {"01/01/23 03:34:00", false},
+ {"01/01/23 04:00:00", false},
+ {"01/01/23 12:00:00", false},
+ {"01/01/23 22:00:00", false},
+ };
+
+ run_should_work_test(test_values_to_expectations);
+}
+
+TEST_F(LCWorkTimeTests, ShouldWorkCustomWorkTimeStartEndSameHour)
+{
+ cct->_conf->rgw_lifecycle_work_time = "22:15-22:45";
+
+ std::unordered_map<std::string, bool> test_values_to_expectations = {
+ {"01/01/23 22:14:59", false},
+ {"01/01/23 22:15:00", true},
+ {"01/01/24 22:44:59", true},
+ {"01/01/24 22:45:59", true},
+ {"01/01/24 22:46:00", false},
+ {"01/01/23 23:00:00", false},
+ {"01/01/23 00:00:00", false},
+ {"01/01/23 12:00:00", false},
+ {"01/01/23 21:00:00", false},
+ };
+
+ run_should_work_test(test_values_to_expectations);
+}
+
+TEST_F(LCWorkTimeTests, ScheduleNextStartTime)
+{
+ cct->_conf->rgw_lifecycle_work_time = "22:15-03:33";
+
+ // items of the map: [ (work_started_time, work_completed_time), expected_value (seconds) ]
+ //
+ // expected_value is the difference between configured start time (i.e, 22:15:00) and
+ // the second item of the tuple (i.e., work_completed_time).
+ //
+ // Note that "seconds" of work completion time is taken into account but date is not relevant.
+ // e.g., the first testcase: 75713 == 01:13:07 - 22:15:00 (https://tinyurl.com/ydm86752)
+ std::map<std::tuple<std::string, std::string>, int> test_values_to_expectations = {
+ {{"01/01/23 22:15:05", "01/01/23 01:13:07"}, 75713},
+ {{"01/01/23 22:15:05", "01/02/23 01:13:07"}, 75713},
+ {{"01/01/23 22:15:05", "01/01/23 22:17:07"}, 86273},
+ {{"01/01/23 22:15:05", "01/02/23 22:17:07"}, 86273},
+ {{"01/01/23 22:15:05", "01/01/23 22:14:00"}, 60},
+ {{"01/01/23 22:15:05", "01/02/23 22:14:00"}, 60},
+ {{"01/01/23 22:15:05", "01/01/23 22:15:00"}, 24 * 60 * 60},
+ {{"01/01/23 22:15:05", "01/02/23 22:15:00"}, 24 * 60 * 60},
+ {{"01/01/23 22:15:05", "01/01/23 22:15:01"}, 24 * 60 * 60 - 1},
+ {{"01/01/23 22:15:05", "01/02/23 22:15:01"}, 24 * 60 * 60 - 1},
+ };
+
+ run_schedule_next_start_time_test(test_values_to_expectations);
+}
diff --git a/src/test/rgw/test_rgw_lua.cc b/src/test/rgw/test_rgw_lua.cc
index 07f8521c231..0485e71ede3 100644
--- a/src/test/rgw/test_rgw_lua.cc
+++ b/src/test/rgw/test_rgw_lua.cc
@@ -86,7 +86,7 @@ public:
return 0;
}
- virtual int read_stats_async(const DoutPrefixProvider *dpp, RGWGetUserStats_CB *cb) override {
+ virtual int read_stats_async(const DoutPrefixProvider *dpp, boost::intrusive_ptr<sal::ReadStatsCB> cb) override {
return 0;
}
@@ -635,8 +635,12 @@ TEST(TestRGWLua, Acl)
function print_grant(k, g)
print("Grant Key: " .. tostring(k))
print("Grant Type: " .. g.Type)
- print("Grant Group Type: " .. g.GroupType)
- print("Grant Referer: " .. g.Referer)
+ if (g.GroupType) then
+ print("Grant Group Type: " .. g.GroupType)
+ end
+ if (g.Referer) then
+ print("Grant Referer: " .. g.Referer)
+ end
if (g.User) then
print("Grant User.Tenant: " .. g.User.Tenant)
print("Grant User.Id: " .. g.User.Id)
@@ -662,11 +666,11 @@ TEST(TestRGWLua, Acl)
)";
DEFINE_REQ_STATE;
- ACLOwner owner;
- owner.set_id(rgw_user("jack", "black"));
- owner.set_name("jack black");
- s.user_acl.reset(new RGWAccessControlPolicy(g_cct));
- s.user_acl->set_owner(owner);
+ const ACLOwner owner{
+ .id = rgw_user("jack", "black"),
+ .display_name = "jack black"
+ };
+ s.user_acl.set_owner(owner);
ACLGrant grant1, grant2, grant3, grant4, grant5, grant6_1, grant6_2;
grant1.set_canon(rgw_user("jane", "doe"), "her grant", 1);
grant2.set_group(ACL_GROUP_ALL_USERS ,2);
@@ -675,13 +679,13 @@ TEST(TestRGWLua, Acl)
grant5.set_group(ACL_GROUP_AUTHENTICATED_USERS, 5);
grant6_1.set_canon(rgw_user("kill", "bill"), "his grant", 6);
grant6_2.set_canon(rgw_user("kill", "bill"), "her grant", 7);
- s.user_acl->get_acl().add_grant(&grant1);
- s.user_acl->get_acl().add_grant(&grant2);
- s.user_acl->get_acl().add_grant(&grant3);
- s.user_acl->get_acl().add_grant(&grant4);
- s.user_acl->get_acl().add_grant(&grant5);
- s.user_acl->get_acl().add_grant(&grant6_1);
- s.user_acl->get_acl().add_grant(&grant6_2);
+ s.user_acl.get_acl().add_grant(grant1);
+ s.user_acl.get_acl().add_grant(grant2);
+ s.user_acl.get_acl().add_grant(grant3);
+ s.user_acl.get_acl().add_grant(grant4);
+ s.user_acl.get_acl().add_grant(grant5);
+ s.user_acl.get_acl().add_grant(grant6_1);
+ s.user_acl.get_acl().add_grant(grant6_2);
const auto rc = lua::request::execute(nullptr, nullptr, nullptr, &s, nullptr, script);
ASSERT_EQ(rc, 0);
}
@@ -730,17 +734,14 @@ TEST(TestRGWLua, UseFunction)
)";
DEFINE_REQ_STATE;
- s.owner.set_name("user two");
- s.owner.set_id(rgw_user("tenant2", "user2"));
- s.user_acl.reset(new RGWAccessControlPolicy());
- s.user_acl->get_owner().set_name("user three");
- s.user_acl->get_owner().set_id(rgw_user("tenant3", "user3"));
- s.bucket_acl.reset(new RGWAccessControlPolicy());
- s.bucket_acl->get_owner().set_name("user four");
- s.bucket_acl->get_owner().set_id(rgw_user("tenant4", "user4"));
- s.object_acl.reset(new RGWAccessControlPolicy());
- s.object_acl->get_owner().set_name("user five");
- s.object_acl->get_owner().set_id(rgw_user("tenant5", "user5"));
+ s.owner.display_name = "user two";
+ s.owner.id = rgw_user("tenant2", "user2");
+ s.user_acl.get_owner().display_name = "user three";
+ s.user_acl.get_owner().id = rgw_user("tenant3", "user3");
+ s.bucket_acl.get_owner().display_name = "user four";
+ s.bucket_acl.get_owner().id = rgw_user("tenant4", "user4");
+ s.object_acl.get_owner().display_name = "user five";
+ s.object_acl.get_owner().id = rgw_user("tenant5", "user5");
const auto rc = lua::request::execute(nullptr, nullptr, nullptr, &s, nullptr, script);
ASSERT_EQ(rc, 0);
diff --git a/src/test/test_c2c.cc b/src/test/test_c2c.cc
index 5fe1ac2f1fc..1569be305e5 100644
--- a/src/test/test_c2c.cc
+++ b/src/test/test_c2c.cc
@@ -70,7 +70,7 @@ int main(int argc, const char **argv)
while(1) {
size_t i;
if (sharding) {
- i = mempool::pool_t::pick_a_shard_int();
+ i = mempool::pick_a_shard_int();
} else {
i = 0;
}
diff --git a/src/test/test_mempool.cc b/src/test/test_mempool.cc
index 9dd96682f36..a8e20a41747 100644
--- a/src/test/test_mempool.cc
+++ b/src/test/test_mempool.cc
@@ -412,7 +412,7 @@ TEST(mempool, check_shard_select)
for (size_t i = 0; i < samples; i++) {
workers.push_back(
std::thread([&](){
- size_t i = mempool::pool_t::pick_a_shard_int();
+ size_t i = mempool::pick_a_shard_int();
shards[i]++;
}));
}
diff --git a/src/test/test_rgw_admin_meta.cc b/src/test/test_rgw_admin_meta.cc
index b1d5fad0600..00c43d10b54 100644
--- a/src/test/test_rgw_admin_meta.cc
+++ b/src/test/test_rgw_admin_meta.cc
@@ -460,7 +460,7 @@ int compare_access_keys(RGWAccessKey& k1, RGWAccessKey& k2) {
int compare_user_info(RGWUserInfo& i1, RGWUserInfo& i2) {
int rv;
- if ((rv = i1.user_id.compare(i2.user_id)) != 0)
+ if ((rv = i1.user_id.id.compare(i2.user_id.id)) != 0)
return rv;
if ((rv = i1.display_name.compare(i2.display_name)) != 0)
return rv;
diff --git a/src/tools/ceph-dencoder/common_types.h b/src/tools/ceph-dencoder/common_types.h
index 3180e3476be..e853321645b 100644
--- a/src/tools/ceph-dencoder/common_types.h
+++ b/src/tools/ceph-dencoder/common_types.h
@@ -23,12 +23,29 @@ TYPE(filepath)
#include "include/fs_types.h"
TYPE_FEATUREFUL(file_layout_t)
+TYPE(inodeno_t)
#include "include/util.h"
TYPE(ceph_data_stats)
+#include "include/object.h"
+TYPE(object_t)
+TYPE(sobject_t)
+
+#include "include/frag.h"
+TYPE(fragtree_t)
+TYPE(frag_t)
+
+#include "include/types.h"
+TYPE(shard_id_t)
+TYPE(inline_data_t)
+TYPE(sha256_digest_t)
+TYPE(errorcode32_t)
+TYPE(client_t)
+
#include "common/bit_vector.hpp"
TYPE(BitVector<2>)
+TYPE(ceph::BitVector<2>)
#include "common/bloom_filter.hpp"
TYPE(bloom_filter)
@@ -37,6 +54,9 @@ TYPE(compressible_bloom_filter)
#include "common/DecayCounter.h"
TYPE(DecayCounter)
+#include "common/entity_name.h"
+TYPE(EntityName)
+
#include "common/histogram.h"
TYPE(pow2_hist_t)
@@ -71,11 +91,17 @@ TYPE(cls_cas_chunk_put_ref_op)
#include "cls/cas/cls_cas_internal.h"
TYPE(chunk_refs_t)
+TYPE(chunk_refs_count_t)
+TYPE(chunk_refs_by_object_t)
#include "cls/lock/cls_lock_types.h"
TYPE(rados::cls::lock::locker_id_t)
TYPE_FEATUREFUL(rados::cls::lock::locker_info_t)
TYPE_FEATUREFUL(rados::cls::lock::lock_info_t)
+using namespace rados::cls::lock;
+TYPE(locker_id_t)
+TYPE_FEATUREFUL(locker_info_t)
+TYPE_FEATUREFUL(lock_info_t)
#include "cls/lock/cls_lock_ops.h"
TYPE(cls_lock_lock_op)
@@ -98,6 +124,64 @@ TYPE(obj_refcount)
#include "cls/timeindex/cls_timeindex_types.h"
TYPE(cls_timeindex_entry)
+#include "cls/timeindex/cls_timeindex_ops.h"
+TYPE(cls_timeindex_list_op)
+TYPE(cls_timeindex_list_ret)
+
+#include "cls/queue/cls_queue_types.h"
+TYPE(cls_queue_entry)
+TYPE(cls_queue_marker)
+TYPE(cls_queue_head)
+
+#include "cls/queue/cls_queue_ops.h"
+TYPE(cls_queue_get_capacity_ret)
+TYPE(cls_queue_remove_op)
+TYPE(cls_queue_enqueue_op)
+TYPE(cls_queue_list_op)
+TYPE(cls_queue_list_ret)
+TYPE(cls_queue_init_op)
+
+#include "cls/2pc_queue/cls_2pc_queue_ops.h"
+TYPE(cls_2pc_queue_abort_op)
+TYPE(cls_2pc_queue_commit_op)
+TYPE(cls_2pc_queue_expire_op)
+TYPE_NONDETERMINISTIC(cls_2pc_queue_reservations_ret)
+TYPE(cls_2pc_queue_reserve_op)
+TYPE(cls_2pc_queue_reserve_ret)
+TYPE(cls_queue_init_op)
+
+#include "cls/2pc_queue/cls_2pc_queue_types.h"
+TYPE(cls_2pc_reservation)
+TYPE_NONDETERMINISTIC(cls_2pc_urgent_data)
+
+#include "cls/log/cls_log_types.h"
+TYPE(cls_log_header)
+
+#include "cls/log/cls_log_ops.h"
+TYPE(cls_log_info_op)
+TYPE(cls_log_list_op)
+TYPE(cls_log_list_ret)
+TYPE(cls_log_trim_op)
+
+#include "cls/version/cls_version_ops.h"
+TYPE(cls_version_check_op)
+TYPE(cls_version_read_ret)
+TYPE(cls_version_inc_op)
+TYPE(cls_version_set_op)
+
+
+#include "cls/fifo/cls_fifo_ops.h"
+using namespace rados::cls::fifo::op;
+TYPE(create_meta)
+TYPE(get_meta)
+TYPE(get_meta_reply)
+
+#include "cls/fifo/cls_fifo_types.h"
+using namespace rados::cls::fifo;
+TYPE(data_params)
+TYPE(objv)
+TYPE(info)
+
#include "journal/Entry.h"
TYPE(journal::Entry)
@@ -125,9 +209,12 @@ MESSAGE(MClientReconnect)
#include "messages/MClientReply.h"
MESSAGE(MClientReply)
+TYPE(openc_response_t)
#include "messages/MClientRequest.h"
MESSAGE(MClientRequest)
+TYPE(SnapPayload)
+TYPE(MClientRequest::Release)
#include "messages/MClientRequestForward.h"
MESSAGE(MClientRequestForward)
@@ -251,6 +338,9 @@ MESSAGE(MMDSBeacon)
#include "messages/MMDSCacheRejoin.h"
MESSAGE(MMDSCacheRejoin)
+TYPE(MMDSCacheRejoin::dirfrag_strong)
+TYPE(MMDSCacheRejoin::dn_strong)
+TYPE(MMDSCacheRejoin::inode_strong)
#include "messages/MMDSFindIno.h"
MESSAGE(MMDSFindIno)
@@ -265,10 +355,14 @@ MESSAGE(MMDSFragmentNotify)
MESSAGE(MMDSLoadTargets)
#include "messages/MMDSMap.h"
-MESSAGE(MMDSMap)
+MESSAGE(MMDSMap)
+
+#include "messages/MMgrBeacon.h"
+MESSAGE(MMgrBeacon)
#include "messages/MMgrReport.h"
MESSAGE(MMgrReport)
+TYPE(PerfCounterType)
#include "messages/MMDSResolve.h"
MESSAGE(MMDSResolve)
@@ -321,6 +415,9 @@ MESSAGE(MMonGetVersionReply)
#include "messages/MMonGlobalID.h"
MESSAGE(MMonGlobalID)
+#include "messages/MMonHealth.h"
+MESSAGE(MMonHealth)
+
#include "messages/MMonJoin.h"
MESSAGE(MMonJoin)
@@ -452,3 +549,74 @@ MESSAGE(MWatchNotify)
#include "messages/MMgrUpdate.h"
MESSAGE(MMgrUpdate)
+
+#include "messages/MOSDECSubOpRead.h"
+MESSAGE(MOSDECSubOpRead)
+
+#include "messages/MOSDECSubOpReadReply.h"
+MESSAGE(MOSDECSubOpReadReply)
+
+#include "messages/MOSDECSubOpWrite.h"
+MESSAGE(MOSDECSubOpWrite)
+
+#include "messages/MOSDECSubOpWriteReply.h"
+MESSAGE(MOSDECSubOpWriteReply)
+
+#include "messages/MOSDMarkMeDown.h"
+MESSAGE(MOSDMarkMeDown)
+
+#include "messages/MOSDPGCreated.h"
+MESSAGE(MOSDPGCreated)
+
+#include "messages/MOSDPGPush.h"
+MESSAGE(MOSDPGPush)
+
+#include "messages/MOSDPGPushReply.h"
+MESSAGE(MOSDPGPushReply)
+
+#include "messages/MOSDPGUpdateLogMissing.h"
+MESSAGE(MOSDPGUpdateLogMissing)
+
+#include "messages/MOSDPGUpdateLogMissingReply.h"
+MESSAGE(MOSDPGUpdateLogMissingReply)
+
+#include "messages/MOSDRepOp.h"
+MESSAGE(MOSDRepOp)
+
+#include "messages/MOSDRepOpReply.h"
+MESSAGE(MOSDRepOpReply)
+
+#include "messages/MRecoveryReserve.h"
+MESSAGE(MRecoveryReserve)
+
+
+#include "auth/cephx/CephxProtocol.h"
+TYPE(CephXAuthenticate)
+TYPE(CephXAuthorize)
+TYPE(CephXAuthorizeChallenge)
+TYPE(CephXAuthorizeReply)
+TYPE(CephXChallengeBlob)
+TYPE(CephXRequestHeader)
+TYPE(CephXResponseHeader)
+TYPE(CephXServerChallenge)
+TYPE(CephXServiceTicket)
+TYPE(CephXServiceTicketInfo)
+TYPE(CephXServiceTicketRequest)
+TYPE(CephXTicketBlob)
+
+#include "auth/cephx/CephxKeyServer.h"
+TYPE(KeyServerData)
+TYPE(KeyServerData::Incremental)
+
+#include "auth/Auth.h"
+TYPE(RotatingSecrets)
+TYPE(ExpiringCryptoKey)
+TYPE(AuthCapsInfo)
+TYPE(AuthTicket)
+TYPE(EntityAuth)
+
+#include "auth/Crypto.h"
+TYPE(CryptoKey)
+
+#include "common/ceph_json.h"
+TYPE(JSONFormattable)
diff --git a/src/vstart.sh b/src/vstart.sh
index 634fb74ac87..1187e86b9ed 100755
--- a/src/vstart.sh
+++ b/src/vstart.sh
@@ -168,6 +168,7 @@ fi
ceph_osd=ceph-osd
rgw_frontend="beast"
rgw_compression=""
+rgw_store="rados"
lockdep=${LOCKDEP:-1}
spdk_enabled=0 # disable SPDK by default
pmem_enabled=0
@@ -229,6 +230,7 @@ options:
--rgw_frontend specify the rgw frontend configuration
--rgw_arrow_flight start arrow flight frontend
--rgw_compression specify the rgw compression plugin
+ --rgw_store storage backend: rados|dbstore|posix
--seastore use seastore as crimson osd backend
-b, --bluestore use bluestore as the osd objectstore backend (default)
-K, --kstore use kstore as the osd objectstore backend
@@ -461,6 +463,10 @@ case $1 in
rgw_compression=$2
shift
;;
+ --rgw_store)
+ rgw_store=$2
+ shift
+ ;;
--kstore_path)
kstore_path=$2
shift
@@ -692,6 +698,22 @@ done
}
+do_rgw_dbstore_conf() {
+ if [ $CEPH_NUM_RGW -gt 1 ]; then
+ echo "dbstore is not distributed so only works with CEPH_NUM_RGW=1"
+ exit 1
+ fi
+
+ prun mkdir -p "$CEPH_DEV_DIR/rgw/dbstore"
+ wconf <<EOF
+ rgw backend store = dbstore
+ rgw config store = dbstore
+ dbstore db dir = $CEPH_DEV_DIR/rgw/dbstore
+ dbstore_config_uri = file://$CEPH_DEV_DIR/rgw/dbstore/config.db
+
+EOF
+}
+
format_conf() {
local opts=$1
local indent=" "
@@ -861,6 +883,20 @@ $CCLIENTDEBUG
; rgw lc debug interval = 10
$(format_conf "${extra_conf}")
EOF
+ if [ "$rgw_store" == "dbstore" ] ; then
+ do_rgw_dbstore_conf
+ elif [ "$rgw_store" == "posix" ] ; then
+ # use dbstore as the backend and posix as the filter
+ do_rgw_dbstore_conf
+ posix_dir="$CEPH_DEV_DIR/rgw/posix"
+ prun mkdir -p $posix_dir/root $posix_dir/lmdb
+ wconf <<EOF
+ rgw filter = posix
+ rgw posix base path = $posix_dir/root
+ rgw posix database root = $posix_dir/lmdb
+
+EOF
+ fi
do_rgw_conf
wconf << EOF
[mds]
@@ -1785,11 +1821,13 @@ do_rgw()
for n in $(seq 1 $CEPH_NUM_RGW); do
rgw_name="client.rgw.${current_port}"
- ceph_adm auth get-or-create $rgw_name \
- mon 'allow rw' \
- osd 'allow rwx' \
- mgr 'allow rw' \
- >> "$keyring_fn"
+ if [ "$CEPH_NUM_MON" -gt 0 ]; then
+ ceph_adm auth get-or-create $rgw_name \
+ mon 'allow rw' \
+ osd 'allow rwx' \
+ mgr 'allow rw' \
+ >> "$keyring_fn"
+ fi
debug echo start rgw on http${CEPH_RGW_HTTPS}://localhost:${current_port}
run 'rgw' $current_port $RGWSUDO $CEPH_BIN/radosgw -c $conf_fn \