diff options
118 files changed, 2288 insertions, 761 deletions
diff --git a/.githubmap b/.githubmap index 5265fa59bed..c8ae6e284a2 100644 --- a/.githubmap +++ b/.githubmap @@ -9,6 +9,7 @@ # a2batic Kanika Murarka <kmurarka@redhat.com> aaSharma14 Aashish Sharma <aasharma@redhat.com> +abhishek-kane Abhishek Kane <abhishek.kane@ibm.com> <abhishek.kane@gmail.com> aclamk Adam Kupczyk <akupczyk@redhat.com> adamemerson Adam C. Emerson <aemerson@redhat.com> adk3798 Adam King <adking@redhat.com> @@ -13,6 +13,7 @@ Aashish Sharma <aasharma@redhat.com> <66050535+aaSharma14@users.noreply.github.c Aashish Sharma <aasharma@redhat.com> <aasharma@li-e74156cc-2f67-11b2-a85c-e98659a63c5c.ibm.com> Aashish Sharma <aasharma@redhat.com> <aashishsharma@fedora.redhat.com> Aashish Sharma <aasharma@redhat.com> <aashishsharma@localhost.localdomain> +Abhishek Kane <abhishek.kane@ibm.com> <abhishek.kane@gmail.com> Abhishek Lekshmanan <abhishek.lekshmanan@cern.ch> <abhishek.l@cern.ch> Abhishek Lekshmanan <abhishek@suse.com> <abhishek.lekshmanan@gmail.com> Abhishek Lekshmanan <abhishek@suse.com> <alekshmanan@suse.com> diff --git a/.organizationmap b/.organizationmap index e59e6ae24e1..ac9b0ea70fe 100644 --- a/.organizationmap +++ b/.organizationmap @@ -345,6 +345,7 @@ Huawei <contact@huawei.com> Yehu <yehu5@huawei.com> Huayun <contact@huayun.com> Zheng Yin <zhengyin@huayun.com> Huazhong University of Science and Technology <contact@hust.edu.cn> Luo Runbing <runsisi@hust.edu.cn> HXT Semiconductor <contact@hxt-semitech.org> Jiang Yutang <yutang2.jiang@hxt-semitech.com> +IBM <contact@IBM.com> Abhishek Kane <abhishek.kane@ibm.com> IBM <contact@IBM.com> Adam Kupczyk <akupczyk@ibm.com> IBM <contact@IBM.com> Afreen Misbah <afreen@ibm.com> IBM <contact@IBM.com> Aliaksei Makarau <aliaksei.makarau@ibm.com> diff --git a/.peoplemap b/.peoplemap index 418e8505fb4..ed70830c092 100644 --- a/.peoplemap +++ b/.peoplemap @@ -16,6 +16,7 @@ # # git log --pretty='%aN <%aE>' $range | git -c mailmap.file=.peoplemap check-mailmap --stdin | sort | uniq | sed -e 's/\(.*\) \(<.*\)/\2 \1/' | uniq --skip-field=1 --all-repeated | sed -e 's/\(.*>\) \(.*\)/\2 \1/' # +Abhishek Kane <abhishek.kane@ibm.com> <abhishek.kane@gmail.com> Abhishek Lekshmanan <abhishek.lekshmanan@cern.ch> <abhishek@suse.com> Adam Kupczyk <akupczyk@ibm.com> <akupczyk@redhat.com> <akupczyk@mirantis.com> Alexandre Marangone <amarango@redhat.com> Alexandre Marangone <alexandre.marangone@inktank.com> diff --git a/container/Containerfile b/container/Containerfile index c954ebed1be..9a5a88e76a1 100644 --- a/container/Containerfile +++ b/container/Containerfile @@ -212,6 +212,7 @@ RUN rpm -q $(cat packages.txt) && rm -f /var/lib/rpm/__db* && rm -f *packages.tx # Set some envs in the container for quickly inspecting details about the build at runtime ENV CEPH_IS_DEVEL="${CI_CONTAINER}" \ CEPH_REF="${CEPH_REF}" \ + CEPH_VERSION="${CEPH_REF}" \ CEPH_OSD_FLAVOR="${OSD_FLAVOR}" \ FROM_IMAGE="${FROM_IMAGE}" diff --git a/doc/cephadm/services/osd.rst b/doc/cephadm/services/osd.rst index 831bd238c79..90ebd86f897 100644 --- a/doc/cephadm/services/osd.rst +++ b/doc/cephadm/services/osd.rst @@ -198,6 +198,18 @@ There are a few ways to create new OSDs: .. warning:: When deploying new OSDs with ``cephadm``, ensure that the ``ceph-osd`` package is not already installed on the target host. If it is installed, conflicts may arise in the management and control of the OSD that may lead to errors or unexpected behavior. +* OSDs created via ``ceph orch daemon add`` are by default not added to the orchestrator's OSD service, they get added to 'osd' service. To attach an OSD to a different, existing OSD service, issue a command of the following form: + + .. prompt:: bash * + + ceph orch osd set-spec-affinity <service_name> <osd_id(s)> + + For example: + + .. prompt:: bash # + + ceph orch osd set-spec-affinity osd.default_drive_group 0 1 + Dry Run ------- diff --git a/doc/cephadm/services/rgw.rst b/doc/cephadm/services/rgw.rst index ed0b149365a..3df8ed2fc56 100644 --- a/doc/cephadm/services/rgw.rst +++ b/doc/cephadm/services/rgw.rst @@ -173,6 +173,32 @@ Then apply this yaml document: Note the value of ``rgw_frontend_ssl_certificate`` is a literal string as indicated by a ``|`` character preserving newline characters. +Disabling multisite sync traffic +-------------------------------- + +There is an RGW config option called ``rgw_run_sync_thread`` that tells the +RGW daemon to not transmit multisite replication data. This is useful if you want +that RGW daemon to be dedicated to I/O rather than multisite sync operations. +The RGW spec file includes a setting ``disable_multisite_sync_traffic`` that when +set to "True" will tell cephadm to set ``rgw_run_sync_thread`` to false for all +RGW daemons deployed for that RGW service. For example + +.. code-block:: yaml + + service_type: rgw + service_id: foo + placement: + label: rgw + spec: + rgw_realm: myrealm + rgw_zone: myzone + rgw_zonegroup: myzg + disable_multisite_sync_traffic: True + +.. note:: This will only stop the RGW daemon(s) from sending replication data. + The daemon can still receive replication data unless it has been removed + from the zonegroup and zone replication endpoints. + Service specification --------------------- diff --git a/doc/man/8/cephadm.rst b/doc/man/8/cephadm.rst index b2cad6cb505..3c23a9867f7 100644 --- a/doc/man/8/cephadm.rst +++ b/doc/man/8/cephadm.rst @@ -13,7 +13,7 @@ Synopsis | [--log-dir LOG_DIR] [--logrotate-dir LOGROTATE_DIR] | [--unit-dir UNIT_DIR] [--verbose] [--timeout TIMEOUT] | [--retry RETRY] [--no-container-init] -| {version,pull,inspect-image,ls,list-networks,adopt,rm-daemon,rm-cluster,run,shell,enter,ceph-volume,unit,logs,bootstrap,deploy,check-host,prepare-host,add-repo,rm-repo,install,list-images} +| {version,pull,inspect-image,ls,list-networks,adopt,rm-daemon,rm-cluster,run,shell,enter,ceph-volume,unit,logs,bootstrap,deploy,check-host,prepare-host,add-repo,rm-repo,install,list-images,update-osd-service} | ... @@ -106,6 +106,7 @@ Synopsis | **cephadm** **list-images** +| **cephadm** **update-osd-service** [-h] [--fsid FSID] --osd-ids OSD_IDS --service-name SERVICE_NAME Description @@ -535,6 +536,18 @@ list-images List the default container images for all services in ini format. The output can be modified with custom images and passed to --config flag during bootstrap. +update-osd-service +------------------ + +Update the OSD service for specific OSDs + +Arguments: + +* [--fsid FSID] cluster FSID +* --osd-ids OSD_IDS Comma-separated OSD IDs +* --service-name SERVICE_NAME OSD service name + + Availability ============ diff --git a/doc/radosgw/bucket_logging.rst b/doc/radosgw/bucket_logging.rst index cb9f8465d20..f3e790f5705 100644 --- a/doc/radosgw/bucket_logging.rst +++ b/doc/radosgw/bucket_logging.rst @@ -15,6 +15,12 @@ The log bucket can accumulate logs from multiple buckets. It is recommended to c a different "prefix" for each bucket, so that the logs of different buckets will be stored in different objects in the log bucket. +.. note:: + + - The log bucket must be created before enabling logging on a bucket + - The log bucket cannot be the same as the bucket being logged + - The log bucket cannot have logging enabled on it + .. toctree:: :maxdepth: 1 @@ -29,6 +35,7 @@ Adding a log object to the log bucket is done "lazily", meaning, that if no more remain outside of the log bucket even after the configured time has passed. To counter that, you can flush all logging objects on a given source bucket to log them, regardless if enough time passed or if no more records are written to the object. +Flushing will happen automatically when logging is disabled on a bucket, its logging configuration is changed, or the bucket is deleted. Standard ```````` @@ -72,7 +79,7 @@ has the following format: :: - <prefix><bucket owner>/<source region>/<bucket name>/<year>/<month>/<day>/<year-month-day-hour-minute-second>-<16 bytes unique-id> + <prefix><bucket owner>/<source region>/[tenant:]<bucket name>/<year>/<month>/<day>/<year-month-day-hour-minute-second>-<16 bytes unique-id> For example: @@ -90,7 +97,7 @@ Journal minimum amount of data used for journaling bucket changes (this is a Ceph extension). - bucket owner (or dash if empty) - - bucket name (or dash if empty) + - bucket name (or dash if empty). in the format: ``[tenant:]<bucket name>`` - time in the following format: ``[day/month/year:hour:minute:second timezone]`` - object key (or dash if empty) - operation in the following format: ``WEBSITE/REST.<HTTP method>.<resource>`` @@ -111,7 +118,7 @@ Standard based on `AWS Logging Record Format`_. - bucket owner (or dash if empty) - - bucket name (or dash if empty) + - bucket name (or dash if empty). in the format: ``[tenant:]<bucket name>`` - time - remote IP (not supported, always a dash) - user or account (or dash if empty) diff --git a/doc/radosgw/config-ref.rst b/doc/radosgw/config-ref.rst index edc6a90b0f9..b4aa56fff54 100644 --- a/doc/radosgw/config-ref.rst +++ b/doc/radosgw/config-ref.rst @@ -78,7 +78,7 @@ These values can be tuned based upon your specific workload to further increase aggressiveness of lifecycle processing. For a workload with a larger number of buckets (thousands) you would look at increasing the :confval:`rgw_lc_max_worker` value from the default value of 3 whereas for a workload with a smaller number of buckets but higher number of objects (hundreds of thousands) -per bucket you would consider decreasing :confval:`rgw_lc_max_wp_worker` from the default value of 3. +per bucket you would consider increasing :confval:`rgw_lc_max_wp_worker` from the default value of 3. .. note:: When looking to tune either of these specific values please validate the current Cluster performance and Ceph Object Gateway utilization before increasing. diff --git a/doc/releases/index.rst b/doc/releases/index.rst index a8015c65465..1393770878f 100644 --- a/doc/releases/index.rst +++ b/doc/releases/index.rst @@ -23,7 +23,6 @@ security fixes. Squid (v19.2.*) <squid> Reef (v18.2.*) <reef> - Quincy (v17.2.*) <quincy> .. ceph_releases:: releases.yml current @@ -40,6 +39,7 @@ receive bug fixes or backports). :maxdepth: 1 :hidden: + Quincy (v17.2.*) <quincy> Pacific (v16.2.*) <pacific> Octopus (v15.2.*) <octopus> Nautilus (v14.2.*) <nautilus> diff --git a/doc/releases/releases.yml b/doc/releases/releases.yml index 948f9eab278..6a76cc7c92c 100644 --- a/doc/releases/releases.yml +++ b/doc/releases/releases.yml @@ -32,6 +32,7 @@ releases: quincy: target_eol: 2024-06-01 + actual_eol: 2025-01-13 releases: - version: 17.2.8 released: 2024-11-25 diff --git a/examples/rgw/boto3/head_bucket_stats.py b/examples/rgw/boto3/head_bucket_stats.py new file mode 100755 index 00000000000..1de40d63f4a --- /dev/null +++ b/examples/rgw/boto3/head_bucket_stats.py @@ -0,0 +1,27 @@ +#!/usr/bin/python + +import boto3 +import sys + +if len(sys.argv) != 2: + print('Usage: ' + sys.argv[0] + ' <bucket>') + sys.exit(1) + +# bucket name as first argument +bucketname = sys.argv[1] + +# endpoint and keys from vstart +endpoint = 'http://127.0.0.1:8000' +access_key='0555b35654ad1656d804' +secret_key='h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q==' + +client = boto3.client('s3', + endpoint_url=endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + +# reading bucket stats via HeadBucket + +response = client.head_bucket(Bucket=bucketname, ReadStats=True) + +print('Objects:', response['ObjectCount'], 'Bytes:', response['BytesUsed']) diff --git a/examples/rgw/boto3/service-2.sdk-extras.json b/examples/rgw/boto3/service-2.sdk-extras.json index b81667ecd09..4618543d61b 100644 --- a/examples/rgw/boto3/service-2.sdk-extras.json +++ b/examples/rgw/boto3/service-2.sdk-extras.json @@ -379,7 +379,36 @@ }, "documentation":"<p>A filter for all log object. Filter for the object by its key (prefix, suffix and regex).</p>", "locationName":"Filter" - } + }, + "HeadBucketRequest": { + "members": { + "ReadStats":{ + "shape":"ReadStats", + "documentation":"<p>Read additional usage statistics for <code>ObjectCount</code> and <code>BytesUsed</code> in the response.</p> <note> <p>This request parameter is a Ceph RGW extension.</p> </note>", + "location":"querystring", + "locationName":"read-stats" + } + } + }, + "HeadBucketOutput":{ + "members":{ + "ObjectCount":{ + "shape":"ObjectCount", + "documentation": "<p>Total number of objects/versions in the bucket.</p>", + "location": "header", + "locationName": "x-rgw-object-count" + }, + "BytesUsed":{ + "shape":"BytesUsed", + "documentation": "<p>Total size in bytes of all objects/versions in the bucket.</p>", + "location": "header", + "locationName": "x-rgw-bytes-used" + } + } + }, + "ReadStats":{"type":"boolean"}, + "ObjectCount":{"type":"integer"}, + "BytesUsed":{"type":"integer"} }, "documentation":"<p/>" } diff --git a/monitoring/ceph-mixin/config.libsonnet b/monitoring/ceph-mixin/config.libsonnet index a15b88422fc..e917b4c2dac 100644 --- a/monitoring/ceph-mixin/config.libsonnet +++ b/monitoring/ceph-mixin/config.libsonnet @@ -9,12 +9,12 @@ CephNodeNetworkPacketDropsPerSec: 10, CephRBDMirrorImageTransferBandwidthThreshold: 0.8, CephRBDMirrorImagesPerDaemonThreshold: 100, - NVMeoFMaxGatewaysPerGroup: 4, - NVMeoFMaxGatewaysPerCluster: 4, + NVMeoFMaxGatewaysPerGroup: 8, + NVMeoFMaxGatewaysPerCluster: 32, NVMeoFHighGatewayCPU: 80, NVMeoFMaxSubsystemsPerGateway: 128, - NVMeoFMaxNamespaces: 1024, - NVMeoFHighClientCount: 32, + NVMeoFMaxNamespaces: 2048, + NVMeoFHighClientCount: 128, NVMeoFHighHostCPU: 80, // // Read/Write latency is defined in ms diff --git a/monitoring/ceph-mixin/prometheus_alerts.yml b/monitoring/ceph-mixin/prometheus_alerts.yml index 3440d761351..7c0da4d51a4 100644 --- a/monitoring/ceph-mixin/prometheus_alerts.yml +++ b/monitoring/ceph-mixin/prometheus_alerts.yml @@ -776,18 +776,18 @@ groups: type: "ceph_default" - alert: "NVMeoFTooManyGateways" annotations: - description: "You may create many gateways, but 4 is the tested limit" + description: "You may create many gateways, but 32 is the tested limit" summary: "Max supported gateways exceeded on cluster {{ $labels.cluster }}" - expr: "count(ceph_nvmeof_gateway_info) by (cluster) > 4.00" + expr: "count(ceph_nvmeof_gateway_info) by (cluster) > 32.00" for: "1m" labels: severity: "warning" type: "ceph_default" - alert: "NVMeoFMaxGatewayGroupSize" annotations: - description: "You may create many gateways in a gateway group, but 4 is the tested limit" + description: "You may create many gateways in a gateway group, but 8 is the tested limit" summary: "Max gateways within a gateway group ({{ $labels.group }}) exceeded on cluster {{ $labels.cluster }}" - expr: "count(ceph_nvmeof_gateway_info) by (cluster,group) > 4.00" + expr: "count(ceph_nvmeof_gateway_info) by (cluster,group) > 8.00" for: "1m" labels: severity: "warning" @@ -832,7 +832,7 @@ groups: annotations: description: "Although you may continue to create namespaces in {{ $labels.gateway_host }}, the configuration may not be supported" summary: "The number of namespaces defined to the gateway exceeds supported values on cluster {{ $labels.cluster }}" - expr: "sum by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_namespace_count,\"gateway_host\",\"$1\",\"instance\",\"(.*?)(?::.*)?\")) > 1024.00" + expr: "sum by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_namespace_count,\"gateway_host\",\"$1\",\"instance\",\"(.*?)(?::.*)?\")) > 2048.00" for: "1m" labels: severity: "warning" @@ -848,9 +848,9 @@ groups: type: "ceph_default" - alert: "NVMeoFHighClientCount" annotations: - description: "The supported limit for clients connecting to a subsystem is 32" + description: "The supported limit for clients connecting to a subsystem is 128" summary: "The number of clients connected to {{ $labels.nqn }} is too high on cluster {{ $labels.cluster }}" - expr: "ceph_nvmeof_subsystem_host_count > 32.00" + expr: "ceph_nvmeof_subsystem_host_count > 128.00" for: "1m" labels: severity: "warning" diff --git a/monitoring/ceph-mixin/tests_alerts/test_alerts.yml b/monitoring/ceph-mixin/tests_alerts/test_alerts.yml index b3b29308d08..83b4ff80375 100644 --- a/monitoring/ceph-mixin/tests_alerts/test_alerts.yml +++ b/monitoring/ceph-mixin/tests_alerts/test_alerts.yml @@ -2331,12 +2331,69 @@ tests: values: '1+0x20' - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.5",cluster="mycluster"}' values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.6",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.7",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.8",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.9",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.10",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.11",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.12",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.13",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.14",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.15",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.16",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.17",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.18",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.19",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.20",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.21",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.22",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.23",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.24",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.25",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.26",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.27",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.28",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.29",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.30",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.31",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.32",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.33",cluster="mycluster"}' + values: '1+0x20' + promql_expr_test: - - expr: count(ceph_nvmeof_gateway_info) by (cluster) > 4.00 + - expr: count(ceph_nvmeof_gateway_info) by (cluster) > 32.00 eval_time: 1m exp_samples: - labels: '{cluster="mycluster"}' - value: 5 + value: 33 alert_rule_test: - eval_time: 5m alertname: NVMeoFTooManyGateways @@ -2347,7 +2404,7 @@ tests: type: ceph_default exp_annotations: summary: "Max supported gateways exceeded on cluster mycluster" - description: "You may create many gateways, but 4 is the tested limit" + description: "You may create many gateways, but 32 is the tested limit" # NVMeoFMaxGatewayGroupSize - interval: 1m @@ -2362,16 +2419,24 @@ tests: values: '1+0x20' - series: 'ceph_nvmeof_gateway_info{group="group-1",addr="1.1.1.12",cluster="mycluster"}' values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{group="group-1",addr="1.1.1.10",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{group="group-1",addr="1.1.1.14",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{group="group-1",addr="1.1.1.11",cluster="mycluster"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{group="group-1",addr="1.1.1.13",cluster="mycluster"}' + values: '1+0x20' - series: 'ceph_nvmeof_gateway_info{group="group-2",addr="1.1.1.4",cluster="mycluster"}' values: '1+0x20' - series: 'ceph_nvmeof_gateway_info{group="group-2",addr="1.1.1.5",cluster="mycluster"}' values: '1+0x20' promql_expr_test: - - expr: count(ceph_nvmeof_gateway_info) by (cluster, group) > 4.00 + - expr: count(ceph_nvmeof_gateway_info) by (cluster, group) > 8.00 eval_time: 1m exp_samples: - labels: '{cluster="mycluster",group="group-1"}' - value: 5 + value: 9 alert_rule_test: - eval_time: 5m alertname: NVMeoFMaxGatewayGroupSize @@ -2383,7 +2448,7 @@ tests: type: ceph_default exp_annotations: summary: "Max gateways within a gateway group (group-1) exceeded on cluster mycluster" - description: "You may create many gateways in a gateway group, but 4 is the tested limit" + description: "You may create many gateways in a gateway group, but 8 is the tested limit" # NVMeoFSingleGatewayGroup - interval: 1m @@ -2767,12 +2832,14 @@ tests: values: '200+0x10' - series: 'ceph_nvmeof_subsystem_namespace_count{instance="node-1:10008",nqn="nqn10",cluster="mycluster"}' values: '200+0x10' + - series: 'ceph_nvmeof_subsystem_namespace_count{instance="node-1:10008",nqn="nqn11",cluster="mycluster"}' + values: '200+0x10' promql_expr_test: - - expr: sum by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_namespace_count,"gateway_host","$1","instance","(.*):.*")) > 1024 + - expr: sum by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_namespace_count,"gateway_host","$1","instance","(.*):.*")) > 2048 eval_time: 1m exp_samples: - labels: '{gateway_host="node-1", cluster="mycluster"}' - value: 2000 + value: 2200 alert_rule_test: - eval_time: 5m alertname: NVMeoFTooManyNamespaces @@ -2815,15 +2882,15 @@ tests: - interval: 1m input_series: - series: 'ceph_nvmeof_subsystem_host_count{nqn="nqn1",cluster="mycluster"}' - values: '2 2 2 4 4 8 8 8 10 10 20 20 32 34 34 38 38 40 44 44' + values: '2 4 8 10 20 30 40 50 62 74 80 95 100 110 130 130 130 130 130 130' - series: 'ceph_nvmeof_subsystem_host_count{nqn="nqn2",cluster="mycluster"}' - values: '2 2 2 8 8 8 16 16 16 16 16 16 16 16 16 16 16 16 16 16' + values: '2 8 16 16 16 16 16 16 16 16 20 20 32 34 34 36 37 37 37 37' promql_expr_test: - - expr: ceph_nvmeof_subsystem_host_count > 32.00 + - expr: ceph_nvmeof_subsystem_host_count > 128.00 eval_time: 15m exp_samples: - labels: '{__name__="ceph_nvmeof_subsystem_host_count",nqn="nqn1",cluster="mycluster"}' - value: 38 + value: 130 alert_rule_test: - eval_time: 20m alertname: NVMeoFHighClientCount @@ -2835,7 +2902,7 @@ tests: type: ceph_default exp_annotations: summary: "The number of clients connected to nqn1 is too high on cluster mycluster" - description: "The supported limit for clients connecting to a subsystem is 32" + description: "The supported limit for clients connecting to a subsystem is 128" # NVMeoFMissingListener - interval: 1m diff --git a/qa/crontab/teuthology-cronjobs b/qa/crontab/teuthology-cronjobs index c979e5b105f..c558a1382ef 100644 --- a/qa/crontab/teuthology-cronjobs +++ b/qa/crontab/teuthology-cronjobs @@ -52,7 +52,6 @@ TEUTHOLOGY_SUITE_ARGS="--non-interactive --newest=100 --ceph-repo=https://git.ce 00 05 * * 0,2,4 $CW $SS 1 --ceph main --suite smoke -p 100 --force-priority 08 05 * * 0 $CW $SS 1 --ceph squid --suite smoke -p 100 --force-priority 16 05 * * 0 $CW $SS 1 --ceph reef --suite smoke -p 100 --force-priority -24 05 * * 0 $CW $SS 1 --ceph quincy --suite smoke -p 100 --force-priority ## ********** windows tests on main branch - weekly # 00 03 * * 1 CEPH_BRANCH=main; MACHINE_NAME=smithi; $CW teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s windows -k distro -e $CEPH_QA_EMAIL @@ -122,7 +121,6 @@ TEUTHOLOGY_SUITE_ARGS="--non-interactive --newest=100 --ceph-repo=https://git.ce 16 00 * * 1 $CW $SS 1 --ceph quincy --suite upgrade-clients/client-upgrade-pacific-quincy --suite-branch pacific -p 820 24 00 * * 1 $CW $SS 120000 --ceph quincy --suite upgrade:octopus-x -p 820 32 00 * * 1 $CW $SS 120000 --ceph quincy --suite upgrade:pacific-x -p 820 -40 00 * * 1 $CW $SS 1 --ceph quincy --suite upgrade/quincy-p2p -p 820 ### upgrade runs for reef release ###### on smithi diff --git a/qa/standalone/scrub/osd-recovery-scrub.sh b/qa/standalone/scrub/osd-recovery-scrub.sh index 843e9b9901b..7b77a60f35b 100755 --- a/qa/standalone/scrub/osd-recovery-scrub.sh +++ b/qa/standalone/scrub/osd-recovery-scrub.sh @@ -163,7 +163,7 @@ function wait_for_scrub_mod() { fi sleep 1 # are we still the primary? - local current_primary=`bin/ceph pg $pgid query | jq '.acting[0]' ` + local current_primary=`./bin/ceph pg $pgid query | jq '.acting[0]' ` if [ $orig_primary != $current_primary ]; then echo $orig_primary no longer primary for $pgid return 0 @@ -194,7 +194,7 @@ function pg_scrub_mod() { local last_scrub=$(get_last_scrub_stamp $pgid) # locate the primary - local my_primary=`bin/ceph pg $pgid query | jq '.acting[0]' ` + local my_primary=`./bin/ceph pg $pgid query | jq '.acting[0]' ` local recovery=false ceph pg scrub $pgid #ceph --format json pg dump pgs | jq ".pg_stats | .[] | select(.pgid == \"$pgid\") | .state" diff --git a/qa/standalone/scrub/osd-scrub-test.sh b/qa/standalone/scrub/osd-scrub-test.sh index 85a45a421c1..385479258f2 100755 --- a/qa/standalone/scrub/osd-scrub-test.sh +++ b/qa/standalone/scrub/osd-scrub-test.sh @@ -603,17 +603,16 @@ function TEST_dump_scrub_schedule() { declare -A expct_dmp_duration=( ['dmp_last_duration']="0" ['dmp_last_duration_neg']="not0" ) wait_any_cond $pgid 10 $saved_last_stamp expct_dmp_duration "WaitingAfterScrub_dmp " sched_data || return 1 - sleep 2 - # # step 2: set noscrub and request a "periodic scrub". Watch for the change in the 'is the scrub # scheduled for the future' value # - ceph tell osd.* config set osd_shallow_scrub_chunk_max "3" || return 1 - ceph tell osd.* config set osd_scrub_sleep "2.0" || return 1 ceph osd set noscrub || return 1 sleep 2 + ceph tell osd.* config set osd_shallow_scrub_chunk_max "3" || return 1 + ceph tell osd.* config set osd_scrub_sleep "2.0" || return 1 + sleep 8 saved_last_stamp=${sched_data['query_last_stamp']} ceph tell $pgid schedule-scrub @@ -692,28 +691,28 @@ function wait_initial_scrubs() { # set a long schedule for the periodic scrubs. Wait for the # initial 'no previous scrub is known' scrubs to finish for all PGs. - bin/ceph tell osd.* config set osd_scrub_min_interval 7200 - bin/ceph tell osd.* config set osd_deep_scrub_interval 14400 - bin/ceph tell osd.* config set osd_max_scrubs 32 - bin/ceph tell osd.* config set osd_scrub_sleep 0 - bin/ceph tell osd.* config set osd_shallow_scrub_chunk_max 10 - bin/ceph tell osd.* config set osd_scrub_chunk_max 10 + ceph tell osd.* config set osd_scrub_min_interval 7200 + ceph tell osd.* config set osd_deep_scrub_interval 14400 + ceph tell osd.* config set osd_max_scrubs 32 + ceph tell osd.* config set osd_scrub_sleep 0 + ceph tell osd.* config set osd_shallow_scrub_chunk_max 10 + ceph tell osd.* config set osd_scrub_chunk_max 10 for pg in "${!pg_to_prim_dict[@]}"; do (( extr_dbg >= 1 )) && echo "Scheduling initial scrub for $pg" - bin/ceph tell $pg scrub || return 1 + ceph tell $pg scrub || return 1 done sleep 1 - (( extr_dbg >= 1 )) && bin/ceph pg dump pgs --format=json-pretty | \ + (( extr_dbg >= 1 )) && ceph pg dump pgs --format=json-pretty | \ jq '.pg_stats | map(select(.last_scrub_duration == 0)) | map({pgid: .pgid, last_scrub_duration: .last_scrub_duration})' tout=20 while [ $tout -gt 0 ] ; do sleep 0.5 - (( extr_dbg >= 2 )) && bin/ceph pg dump pgs --format=json-pretty | \ + (( extr_dbg >= 2 )) && ceph pg dump pgs --format=json-pretty | \ jq '.pg_stats | map(select(.last_scrub_duration == 0)) | map({pgid: .pgid, last_scrub_duration: .last_scrub_duration})' - not_done=$(bin/ceph pg dump pgs --format=json-pretty | \ + not_done=$(ceph pg dump pgs --format=json-pretty | \ jq '.pg_stats | map(select(.last_scrub_duration == 0)) | map({pgid: .pgid, last_scrub_duration: .last_scrub_duration})' | wc -l ) # note that we should ignore a header line if [ "$not_done" -le 1 ]; then @@ -782,14 +781,14 @@ function TEST_abort_periodic_for_operator() { wait_initial_scrubs pg_pr || return 1 # limit all OSDs to one scrub at a time - bin/ceph tell osd.* config set osd_max_scrubs 1 - bin/ceph tell osd.* config set osd_stats_update_period_not_scrubbing 1 + ceph tell osd.* config set osd_max_scrubs 1 + ceph tell osd.* config set osd_stats_update_period_not_scrubbing 1 # configure for slow scrubs - bin/ceph tell osd.* config set osd_scrub_sleep 3 - bin/ceph tell osd.* config set osd_shallow_scrub_chunk_max 2 - bin/ceph tell osd.* config set osd_scrub_chunk_max 2 - (( extr_dbg >= 2 )) && bin/ceph tell osd.2 dump_scrub_reservations --format=json-pretty + ceph tell osd.* config set osd_scrub_sleep 3 + ceph tell osd.* config set osd_shallow_scrub_chunk_max 2 + ceph tell osd.* config set osd_scrub_chunk_max 2 + (( extr_dbg >= 2 )) && ceph tell osd.2 dump_scrub_reservations --format=json-pretty # the first PG to work with: local pg1="1.0" @@ -812,7 +811,7 @@ function TEST_abort_periodic_for_operator() { fi # the common primary is allowed two concurrent scrubs - bin/ceph tell osd."${pg_pr[$pg1]}" config set osd_max_scrubs 2 + ceph tell osd."${pg_pr[$pg1]}" config set osd_max_scrubs 2 echo "The two PGs to manipulate are $pg1 and $pg2" set_query_debug "$pg1" @@ -821,31 +820,31 @@ function TEST_abort_periodic_for_operator() { local is_act for i in $( seq 1 3 ) do - is_act=$(bin/ceph pg "$pg1" query | jq '.scrubber.active') + is_act=$(ceph pg "$pg1" query | jq '.scrubber.active') if [[ "$is_act" = "false" ]]; then break fi echo "Still waiting for pg $pg1 to finish scrubbing" sleep 0.7 done - bin/ceph pg dump pgs + ceph pg dump pgs if [[ "$is_act" != "false" ]]; then - bin/ceph pg "$pg1" query + ceph pg "$pg1" query echo "PG $pg1 appears to be still scrubbing" return 1 fi sleep 0.5 echo "Initiating a periodic scrub of $pg1" - (( extr_dbg >= 2 )) && bin/ceph pg "$pg1" query -f json-pretty | jq '.scrubber' - bin/ceph tell $pg1 schedule-deep-scrub || return 1 + (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber' + ceph tell $pg1 schedule-deep-scrub || return 1 sleep 1 - (( extr_dbg >= 2 )) && bin/ceph pg "$pg1" query -f json-pretty | jq '.scrubber' + (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber' for i in $( seq 1 14 ) do sleep 0.5 - stt=$(bin/ceph pg "$pg1" query | jq '.scrubber') + stt=$(ceph pg "$pg1" query | jq '.scrubber') is_active=$(echo $stt | jq '.active') is_reserving_replicas=$(echo $stt | jq '.is_reserving_replicas') if [[ "$is_active" = "true" && "$is_reserving_replicas" = "false" ]]; then @@ -854,49 +853,49 @@ function TEST_abort_periodic_for_operator() { echo "Still waiting for pg $pg1 to start scrubbing: $stt" done if [[ "$is_active" != "true" || "$is_reserving_replicas" != "false" ]]; then - bin/ceph pg "$pg1" query -f json-pretty | jq '.scrubber' + ceph pg "$pg1" query -f json-pretty | jq '.scrubber' echo "The scrub is not active or is reserving replicas" return 1 fi - (( extr_dbg >= 2 )) && bin/ceph pg "$pg1" query -f json-pretty | jq '.scrubber' + (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber' # PG 1 is scrubbing, and has reserved the replicas - soem of which are shared # by PG 2. As the max-scrubs was set to 1, that should prevent PG 2 from # reserving its replicas. - (( extr_dbg >= 1 )) && bin/ceph tell osd.* dump_scrub_reservations --format=json-pretty + (( extr_dbg >= 1 )) && ceph tell osd.* dump_scrub_reservations --format=json-pretty # now - the 2'nd scrub - which should be blocked on reserving set_query_debug "$pg2" - bin/ceph tell "$pg2" schedule-deep-scrub + ceph tell "$pg2" schedule-deep-scrub sleep 0.5 (( extr_dbg >= 2 )) && echo "====================================================================================" - (( extr_dbg >= 2 )) && bin/ceph pg "$pg2" query -f json-pretty | jq '.scrubber' - (( extr_dbg >= 2 )) && bin/ceph pg "$pg1" query -f json-pretty | jq '.scrubber' + (( extr_dbg >= 2 )) && ceph pg "$pg2" query -f json-pretty | jq '.scrubber' + (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber' sleep 1 (( extr_dbg >= 2 )) && echo "====================================================================================" - (( extr_dbg >= 2 )) && bin/ceph pg "$pg2" query -f json-pretty | jq '.scrubber' - (( extr_dbg >= 2 )) && bin/ceph pg "$pg1" query -f json-pretty | jq '.scrubber' + (( extr_dbg >= 2 )) && ceph pg "$pg2" query -f json-pretty | jq '.scrubber' + (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber' # make sure pg2 scrub is stuck in the reserving state - local stt2=$(bin/ceph pg "$pg2" query | jq '.scrubber') + local stt2=$(ceph pg "$pg2" query | jq '.scrubber') local pg2_is_reserving pg2_is_reserving=$(echo $stt2 | jq '.is_reserving_replicas') if [[ "$pg2_is_reserving" != "true" ]]; then echo "The scheduled scrub for $pg2 should have been stuck" - bin/ceph pg dump pgs + ceph pg dump pgs return 1 fi # now - issue an operator-initiated scrub on pg2. # The periodic scrub should be aborted, and the operator-initiated scrub should start. echo "Instructing $pg2 to perform a high-priority scrub" - bin/ceph tell "$pg2" scrub + ceph tell "$pg2" scrub for i in $( seq 1 10 ) do sleep 0.5 - stt2=$(bin/ceph pg "$pg2" query | jq '.scrubber') + stt2=$(ceph pg "$pg2" query | jq '.scrubber') pg2_is_active=$(echo $stt2 | jq '.active') pg2_is_reserving=$(echo $stt2 | jq '.is_reserving_replicas') if [[ "$pg2_is_active" = "true" && "$pg2_is_reserving" != "true" ]]; then diff --git a/qa/standalone/scrub/scrub-helpers.sh b/qa/standalone/scrub/scrub-helpers.sh index 0b14d6028b6..dd37b643e08 100644 --- a/qa/standalone/scrub/scrub-helpers.sh +++ b/qa/standalone/scrub/scrub-helpers.sh @@ -320,7 +320,7 @@ function build_pg_dicts { # if the infile name is '-', fetch the dump directly from the ceph cluster if [[ $infile == "-" ]]; then - local -r ceph_cmd="bin/ceph pg dump pgs_brief -f=json-pretty" + local -r ceph_cmd="ceph pg dump pgs_brief -f=json-pretty" local -r ceph_cmd_out=$(eval $ceph_cmd) local -r ceph_cmd_rc=$? if [[ $ceph_cmd_rc -ne 0 ]]; then diff --git a/qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml b/qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml index 372bf2561fa..8b3c4c11ac6 100644 --- a/qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml +++ b/qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml @@ -15,6 +15,7 @@ overrides: # causing tests to fail due to health warns, even if # the tests themselves are successful. - \(OSDMAP_FLAGS\) + - \(PG_DEGRADED\) tasks: - workunit: clients: diff --git a/qa/suites/rados/verify/validater/valgrind.yaml b/qa/suites/rados/verify/validater/valgrind.yaml index e2dc29b5f7e..17cf141b0cd 100644 --- a/qa/suites/rados/verify/validater/valgrind.yaml +++ b/qa/suites/rados/verify/validater/valgrind.yaml @@ -27,6 +27,7 @@ overrides: - \(SLOW_OPS\) - slow request - OSD bench result + - OSD_DOWN valgrind: mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes] osd: [--tool=memcheck] diff --git a/src/ceph-volume/ceph_volume/main.py b/src/ceph-volume/ceph_volume/main.py index f8eca65ec49..4f27f429e89 100644 --- a/src/ceph-volume/ceph_volume/main.py +++ b/src/ceph-volume/ceph_volume/main.py @@ -11,8 +11,16 @@ try: from importlib.metadata import entry_points def get_entry_points(group: str): # type: ignore - return entry_points().get(group, []) # type: ignore + eps = entry_points() + if hasattr(eps, 'select'): + # New importlib.metadata uses .select() + return eps.select(group=group) + else: + # Fallback to older EntryPoints that returns dicts + return eps.get(group, []) # type: ignore + except ImportError: + # Fallback to `pkg_resources` for older versions from pkg_resources import iter_entry_points as entry_points # type: ignore def get_entry_points(group: str): # type: ignore diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py index d2ddf564116..a8616980e4d 100755 --- a/src/cephadm/cephadm.py +++ b/src/cephadm/cephadm.py @@ -111,6 +111,7 @@ from cephadmlib.file_utils import ( unlink_file, write_new, write_tmp, + update_meta_file, ) from cephadmlib.net_utils import ( build_addrv_params, @@ -3453,6 +3454,7 @@ def list_daemons( detail: bool = True, legacy_dir: Optional[str] = None, daemon_name: Optional[str] = None, + type_of_daemon: Optional[str] = None, ) -> List[Dict[str, str]]: host_version: Optional[str] = None ls = [] @@ -3489,6 +3491,8 @@ def list_daemons( if os.path.exists(data_dir): for i in os.listdir(data_dir): if i in ['mon', 'osd', 'mds', 'mgr', 'rgw']: + if type_of_daemon and type_of_daemon != i: + continue daemon_type = i for j in os.listdir(os.path.join(data_dir, i)): if '-' not in j: @@ -3525,6 +3529,8 @@ def list_daemons( if daemon_name and name != daemon_name: continue (daemon_type, daemon_id) = j.split('.', 1) + if type_of_daemon and type_of_daemon != daemon_type: + continue unit_name = get_unit_name(fsid, daemon_type, daemon_id) @@ -4705,6 +4711,34 @@ def command_list_images(ctx: CephadmContext) -> None: # print default images cp_obj.write(sys.stdout) + +def update_service_for_daemon(ctx: CephadmContext, + available_daemons: list, + update_daemons: list) -> None: + """ Update the unit.meta file of daemon with required service name for valid daemons""" + + data = {'service_name': ctx.service_name} + # check if all the daemon names are valid + if not set(update_daemons).issubset(set(available_daemons)): + raise Error(f'Error EINVAL: one or more daemons of {update_daemons} does not exist on this host') + for name in update_daemons: + path = os.path.join(ctx.data_dir, ctx.fsid, name, 'unit.meta') + update_meta_file(path, data) + print(f'Successfully updated daemon {name} with service {ctx.service_name}') + + +@infer_fsid +def command_update_osd_service(ctx: CephadmContext) -> int: + """update service for provided daemon""" + update_daemons = [f'osd.{osd_id}' for osd_id in ctx.osd_ids.split(',')] + daemons = list_daemons(ctx, detail=False, type_of_daemon='osd') + if not daemons: + raise Error(f'Daemon {ctx.osd_ids} does not exists on this host') + available_daemons = [d['name'] for d in daemons] + update_service_for_daemon(ctx, available_daemons, update_daemons) + return 0 + + ################################## @@ -5571,6 +5605,14 @@ def _get_parser(): parser_list_images = subparsers.add_parser( 'list-images', help='list all the default images') parser_list_images.set_defaults(func=command_list_images) + + parser_update_service = subparsers.add_parser( + 'update-osd-service', help='update service for provided daemon') + parser_update_service.set_defaults(func=command_update_osd_service) + parser_update_service.add_argument('--fsid', help='cluster FSID') + parser_update_service.add_argument('--osd-ids', required=True, help='Comma-separated OSD IDs') + parser_update_service.add_argument('--service-name', required=True, help='OSD service name') + return parser diff --git a/src/cephadm/cephadmlib/daemons/monitoring.py b/src/cephadm/cephadmlib/daemons/monitoring.py index 9a9402632b0..4ba00daaefb 100644 --- a/src/cephadm/cephadmlib/daemons/monitoring.py +++ b/src/cephadm/cephadmlib/daemons/monitoring.py @@ -16,7 +16,13 @@ from ..daemon_form import register as register_daemon_form from ..daemon_identity import DaemonIdentity from ..deployment_utils import to_deployment_container from ..exceptions import Error -from ..net_utils import get_fqdn, get_hostname, get_ip_addresses, wrap_ipv6 +from ..net_utils import ( + get_fqdn, + get_hostname, + get_ip_addresses, + wrap_ipv6, + EndPoint, +) @register_daemon_form @@ -89,11 +95,6 @@ class Monitoring(ContainerDaemonForm): 'image': DefaultImages.ALERTMANAGER.image_ref, 'cpus': '2', 'memory': '2GB', - 'args': [ - '--cluster.listen-address=:{}'.format( - port_map['alertmanager'][1] - ), - ], 'config-json-files': [ 'alertmanager.yml', ], @@ -248,11 +249,14 @@ class Monitoring(ContainerDaemonForm): ip = meta['ip'] if 'ports' in meta and meta['ports']: port = meta['ports'][0] - if daemon_type == 'prometheus': - config = fetch_configs(ctx) + config = fetch_configs(ctx) + if daemon_type in ['prometheus', 'alertmanager']: ip_to_bind_to = config.get('ip_to_bind_to', '') if ip_to_bind_to: ip = ip_to_bind_to + web_listen_addr = str(EndPoint(ip, port)) + r += [f'--web.listen-address={web_listen_addr}'] + if daemon_type == 'prometheus': retention_time = config.get('retention_time', '15d') retention_size = config.get( 'retention_size', '0' @@ -276,9 +280,11 @@ class Monitoring(ContainerDaemonForm): r += ['--web.route-prefix=/prometheus/'] else: r += [f'--web.external-url={scheme}://{host}:{port}'] - r += [f'--web.listen-address={ip}:{port}'] if daemon_type == 'alertmanager': - config = fetch_configs(ctx) + clus_listen_addr = str( + EndPoint(ip, self.port_map[daemon_type][1]) + ) + r += [f'--cluster.listen-address={clus_listen_addr}'] use_url_prefix = config.get('use_url_prefix', False) peers = config.get('peers', list()) # type: ignore for peer in peers: @@ -294,13 +300,11 @@ class Monitoring(ContainerDaemonForm): if daemon_type == 'promtail': r += ['--config.expand-env'] if daemon_type == 'prometheus': - config = fetch_configs(ctx) try: r += [f'--web.config.file={config["web_config"]}'] except KeyError: pass if daemon_type == 'node-exporter': - config = fetch_configs(ctx) try: r += [f'--web.config.file={config["web_config"]}'] except KeyError: diff --git a/src/cephadm/cephadmlib/file_utils.py b/src/cephadm/cephadmlib/file_utils.py index 27e70e31756..4dd88cc3671 100644 --- a/src/cephadm/cephadmlib/file_utils.py +++ b/src/cephadm/cephadmlib/file_utils.py @@ -5,6 +5,7 @@ import datetime import logging import os import tempfile +import json from contextlib import contextmanager from pathlib import Path @@ -157,3 +158,26 @@ def unlink_file( except Exception: if not ignore_errors: raise + + +def update_meta_file(file_path: str, update_key_val: dict) -> None: + """Update key in the file with provided value""" + try: + with open(file_path, 'r') as fh: + data = json.load(fh) + file_stat = os.stat(file_path) + except FileNotFoundError: + raise + except Exception: + logger.exception(f'Failed to update {file_path}') + raise + data.update( + {key: value for key, value in update_key_val.items() if key in data} + ) + + with write_new( + file_path, + owner=(file_stat.st_uid, file_stat.st_gid), + perms=(file_stat.st_mode & 0o777), + ) as fh: + fh.write(json.dumps(data, indent=4) + '\n') diff --git a/src/cephadm/cephadmlib/net_utils.py b/src/cephadm/cephadmlib/net_utils.py index 9a7f138b1c6..bfa61d933ef 100644 --- a/src/cephadm/cephadmlib/net_utils.py +++ b/src/cephadm/cephadmlib/net_utils.py @@ -24,12 +24,22 @@ class EndPoint: def __init__(self, ip: str, port: int) -> None: self.ip = ip self.port = port + self.is_ipv4 = True + try: + if ip and ipaddress.ip_network(ip).version == 6: + self.is_ipv4 = False + except Exception: + logger.exception('Failed to check ip address version') def __str__(self) -> str: - return f'{self.ip}:{self.port}' + if self.is_ipv4: + return f'{self.ip}:{self.port}' + return f'[{self.ip}]:{self.port}' def __repr__(self) -> str: - return f'{self.ip}:{self.port}' + if self.is_ipv4: + return f'{self.ip}:{self.port}' + return f'[{self.ip}]:{self.port}' def attempt_bind(ctx, s, address, port): diff --git a/src/cephadm/tests/test_deploy.py b/src/cephadm/tests/test_deploy.py index c5094db335f..1736639ed55 100644 --- a/src/cephadm/tests/test_deploy.py +++ b/src/cephadm/tests/test_deploy.py @@ -316,7 +316,7 @@ def test_deploy_a_monitoring_container(cephadm_fs, funkypatch): runfile_lines = f.read().splitlines() assert 'podman' in runfile_lines[-1] assert runfile_lines[-1].endswith( - 'quay.io/titans/prometheus:latest --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --storage.tsdb.retention.time=15d --storage.tsdb.retention.size=0 --web.external-url=http://10.10.10.10:9095 --web.listen-address=1.2.3.4:9095' + 'quay.io/titans/prometheus:latest --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --web.listen-address=1.2.3.4:9095 --storage.tsdb.retention.time=15d --storage.tsdb.retention.size=0 --web.external-url=http://10.10.10.10:9095' ) assert '--user 8765' in runfile_lines[-1] assert f'-v /var/lib/ceph/{fsid}/prometheus.fire/etc/prometheus:/etc/prometheus:Z' in runfile_lines[-1] diff --git a/src/common/bit_vector.hpp b/src/common/bit_vector.hpp index 961d9a0192e..c5fd491ed29 100644 --- a/src/common/bit_vector.hpp +++ b/src/common/bit_vector.hpp @@ -29,8 +29,8 @@ private: static const uint8_t MASK = static_cast<uint8_t>((1 << _bit_count) - 1); // must be power of 2 - BOOST_STATIC_ASSERT((_bit_count != 0) && !(_bit_count & (_bit_count - 1))); - BOOST_STATIC_ASSERT(_bit_count <= BITS_PER_BYTE); + static_assert((_bit_count != 0) && !(_bit_count & (_bit_count - 1))); + static_assert(_bit_count <= BITS_PER_BYTE); template <typename DataIterator> class ReferenceImpl { diff --git a/src/common/ceph_time.h b/src/common/ceph_time.h index 01feff4c063..0b05be5372e 100644 --- a/src/common/ceph_time.h +++ b/src/common/ceph_time.h @@ -342,6 +342,23 @@ public: } }; +// Please note time_guard is not thread safety -- multiple threads +// updating same diff_accumulator can corrupt it. +template <class ClockT = mono_clock> +class time_guard { + const typename ClockT::time_point start; + timespan& diff_accumulator; + +public: + time_guard(timespan& diff_accumulator) + : start(ClockT::now()), + diff_accumulator(diff_accumulator) { + } + ~time_guard() { + diff_accumulator += ClockT::now() - start; + } +}; + namespace time_detail { // So that our subtractions produce negative spans rather than // arithmetic underflow. diff --git a/src/common/config_cacher.h b/src/common/config_cacher.h index 91b8152dde1..f23195955a1 100644 --- a/src/common/config_cacher.h +++ b/src/common/config_cacher.h @@ -18,21 +18,30 @@ #include "common/config_obs.h" #include "common/config.h" +/** + * A simple class to cache a single configuration value. + * Points to note: + * - as get_tracked_conf_keys() must return a pointer to a null-terminated + * array of C-strings, 'keys' - an array - is used to hold the sole key + * that this observer is interested in. + * - the const cast should be removed once we change the + * get_tracked_conf_keys() to return const char* const * (or something + * similar). + */ template <typename ValueT> class md_config_cacher_t : public md_config_obs_t { ConfigProxy& conf; - const char* const option_name; + const char* keys[2]; std::atomic<ValueT> value_cache; const char** get_tracked_conf_keys() const override { - const static char* keys[] = { option_name, nullptr }; - return keys; + return const_cast<const char**>(keys); } void handle_conf_change(const ConfigProxy& conf, const std::set<std::string>& changed) override { - if (changed.count(option_name)) { - value_cache.store(conf.get_val<ValueT>(option_name)); + if (changed.contains(keys[0])) { + value_cache.store(conf.get_val<ValueT>(keys[0])); } } @@ -40,10 +49,10 @@ public: md_config_cacher_t(ConfigProxy& conf, const char* const option_name) : conf(conf), - option_name(option_name) { + keys{option_name, nullptr} { conf.add_observer(this); std::atomic_init(&value_cache, - conf.get_val<ValueT>(option_name)); + conf.get_val<ValueT>(keys[0])); } ~md_config_cacher_t() { diff --git a/src/common/io_exerciser/RadosIo.cc b/src/common/io_exerciser/RadosIo.cc index 4451900b7bb..a78c074228b 100644 --- a/src/common/io_exerciser/RadosIo.cc +++ b/src/common/io_exerciser/RadosIo.cc @@ -11,6 +11,32 @@ using RadosIo = ceph::io_exerciser::RadosIo; +namespace { +template <typename S> +int send_osd_command(int osd, S& s, librados::Rados& rados, const char* name, + ceph::buffer::list& inbl, ceph::buffer::list* outbl, + Formatter* f) { + encode_json(name, s, f); + + std::ostringstream oss; + f->flush(oss); + int rc = rados.osd_command(osd, oss.str(), inbl, outbl, nullptr); + return rc; +} + +template <typename S> +int send_mon_command(S& s, librados::Rados& rados, const char* name, + ceph::buffer::list& inbl, ceph::buffer::list* outbl, + Formatter* f) { + encode_json(name, s, f); + + std::ostringstream oss; + f->flush(oss); + int rc = rados.mon_command(oss.str(), inbl, outbl, nullptr); + return rc; +} +} // namespace + RadosIo::RadosIo(librados::Rados& rados, boost::asio::io_context& asio, const std::string& pool, const std::string& oid, const std::optional<std::vector<int>>& cached_shard_order, @@ -293,15 +319,13 @@ void RadosIo::applyReadWriteOp(IoOp& op) { void RadosIo::applyInjectOp(IoOp& op) { bufferlist osdmap_inbl, inject_inbl, osdmap_outbl, inject_outbl; auto formatter = std::make_unique<JSONFormatter>(false); - std::ostringstream oss; int osd = -1; std::vector<int> shard_order; ceph::messaging::osd::OSDMapRequest osdMapRequest{pool, get_oid(), ""}; - encode_json("OSDMapRequest", osdMapRequest, formatter.get()); - formatter->flush(oss); - int rc = rados.mon_command(oss.str(), osdmap_inbl, &osdmap_outbl, nullptr); + int rc = send_mon_command(osdMapRequest, rados, "OSDMapRequest", osdmap_inbl, + &osdmap_outbl, formatter.get()); ceph_assert(rc == 0); JSONParser p; @@ -322,22 +346,22 @@ void RadosIo::applyInjectOp(IoOp& op) { ceph::messaging::osd::InjectECErrorRequest<InjectOpType::ReadEIO> injectErrorRequest{pool, oid, errorOp.shard, errorOp.type, errorOp.when, errorOp.duration}; - encode_json("InjectECErrorRequest", injectErrorRequest, - formatter.get()); + int rc = send_osd_command(osd, injectErrorRequest, rados, + "InjectECErrorRequest", inject_inbl, + &inject_outbl, formatter.get()); + ceph_assert(rc == 0); } else if (errorOp.type == 1) { ceph::messaging::osd::InjectECErrorRequest< InjectOpType::ReadMissingShard> injectErrorRequest{pool, oid, errorOp.shard, errorOp.type, errorOp.when, errorOp.duration}; - encode_json("InjectECErrorRequest", injectErrorRequest, - formatter.get()); + int rc = send_osd_command(osd, injectErrorRequest, rados, + "InjectECErrorRequest", inject_inbl, + &inject_outbl, formatter.get()); + ceph_assert(rc == 0); } else { ceph_abort_msg("Unsupported inject type"); } - formatter->flush(oss); - int rc = rados.osd_command(osd, oss.str(), inject_inbl, &inject_outbl, - nullptr); - ceph_assert(rc == 0); break; } case OpType::InjectWriteError: { @@ -348,14 +372,18 @@ void RadosIo::applyInjectOp(IoOp& op) { InjectOpType::WriteFailAndRollback> injectErrorRequest{pool, oid, errorOp.shard, errorOp.type, errorOp.when, errorOp.duration}; - encode_json("InjectECErrorRequest", injectErrorRequest, - formatter.get()); + int rc = send_osd_command(osd, injectErrorRequest, rados, + "InjectECErrorRequest", inject_inbl, + &inject_outbl, formatter.get()); + ceph_assert(rc == 0); } else if (errorOp.type == 3) { ceph::messaging::osd::InjectECErrorRequest<InjectOpType::WriteOSDAbort> injectErrorRequest{pool, oid, errorOp.shard, errorOp.type, errorOp.when, errorOp.duration}; - encode_json("InjectECErrorRequest", injectErrorRequest, - formatter.get()); + int rc = send_osd_command(osd, injectErrorRequest, rados, + "InjectECErrorRequest", inject_inbl, + &inject_outbl, formatter.get()); + ceph_assert(rc == 0); // This inject is sent directly to the shard we want to inject the error // on @@ -364,10 +392,6 @@ void RadosIo::applyInjectOp(IoOp& op) { ceph_abort("Unsupported inject type"); } - formatter->flush(oss); - int rc = rados.osd_command(osd, oss.str(), inject_inbl, &inject_outbl, - nullptr); - ceph_assert(rc == 0); break; } case OpType::ClearReadErrorInject: { @@ -377,22 +401,22 @@ void RadosIo::applyInjectOp(IoOp& op) { if (errorOp.type == 0) { ceph::messaging::osd::InjectECClearErrorRequest<InjectOpType::ReadEIO> clearErrorInject{pool, oid, errorOp.shard, errorOp.type}; - encode_json("InjectECClearErrorRequest", clearErrorInject, - formatter.get()); + int rc = send_osd_command(osd, clearErrorInject, rados, + "InjectECClearErrorRequest", inject_inbl, + &inject_outbl, formatter.get()); + ceph_assert(rc == 0); } else if (errorOp.type == 1) { ceph::messaging::osd::InjectECClearErrorRequest< InjectOpType::ReadMissingShard> clearErrorInject{pool, oid, errorOp.shard, errorOp.type}; - encode_json("InjectECClearErrorRequest", clearErrorInject, - formatter.get()); + int rc = send_osd_command(osd, clearErrorInject, rados, + "InjectECClearErrorRequest", inject_inbl, + &inject_outbl, formatter.get()); + ceph_assert(rc == 0); } else { ceph_abort("Unsupported inject type"); } - formatter->flush(oss); - int rc = rados.osd_command(osd, oss.str(), inject_inbl, &inject_outbl, - nullptr); - ceph_assert(rc == 0); break; } case OpType::ClearWriteErrorInject: { @@ -403,22 +427,22 @@ void RadosIo::applyInjectOp(IoOp& op) { ceph::messaging::osd::InjectECClearErrorRequest< InjectOpType::WriteFailAndRollback> clearErrorInject{pool, oid, errorOp.shard, errorOp.type}; - encode_json("InjectECClearErrorRequest", clearErrorInject, - formatter.get()); + int rc = send_osd_command(osd, clearErrorInject, rados, + "InjectECClearErrorRequest", inject_inbl, + &inject_outbl, formatter.get()); + ceph_assert(rc == 0); } else if (errorOp.type == 3) { ceph::messaging::osd::InjectECClearErrorRequest< InjectOpType::WriteOSDAbort> clearErrorInject{pool, oid, errorOp.shard, errorOp.type}; - encode_json("InjectECClearErrorRequest", clearErrorInject, - formatter.get()); + int rc = send_osd_command(osd, clearErrorInject, rados, + "InjectECClearErrorRequest", inject_inbl, + &inject_outbl, formatter.get()); + ceph_assert(rc == 0); } else { ceph_abort("Unsupported inject type"); } - formatter->flush(oss); - int rc = rados.osd_command(osd, oss.str(), inject_inbl, &inject_outbl, - nullptr); - ceph_assert(rc == 0); break; } default: diff --git a/src/crimson/os/alienstore/alien_store.cc b/src/crimson/os/alienstore/alien_store.cc index a9c69f4660e..db6decd84f9 100644 --- a/src/crimson/os/alienstore/alien_store.cc +++ b/src/crimson/os/alienstore/alien_store.cc @@ -435,8 +435,21 @@ auto AlienStore::omap_get_values(CollectionRef ch, return do_with_op_gate(omap_values_t{}, [=, this] (auto &values) { return tp->submit(ch->get_cid().hash_to_shard(tp->size()), [=, this, &values] { auto c = static_cast<AlienCollection*>(ch.get()); - return store->omap_get_values(c->collection, oid, start, - reinterpret_cast<map<string, bufferlist>*>(&values)); + return store->omap_iterate( + c->collection, oid, + ObjectStore::omap_iter_seek_t{ + .seek_position = start.value_or(std::string{}), + // FIXME: classical OSDs begins iteration from LOWER_BOUND + // (or UPPER_BOUND if filter_prefix > start). However, these + // bits are not implemented yet + .seek_type = ObjectStore::omap_iter_seek_t::UPPER_BOUND + }, + [&values] + (std::string_view key, std::string_view value) mutable { + values[std::string{key}].append(value); + // FIXME: there is limit on number of entries yet + return ObjectStore::omap_iter_ret_t::NEXT; + }); }).then([&values] (int r) -> read_errorator::future<std::tuple<bool, omap_values_t>> { if (r == -ENOENT) { diff --git a/src/crimson/os/seastore/CMakeLists.txt b/src/crimson/os/seastore/CMakeLists.txt index e5b8960c38c..3da5e65ceec 100644 --- a/src/crimson/os/seastore/CMakeLists.txt +++ b/src/crimson/os/seastore/CMakeLists.txt @@ -1,5 +1,6 @@ set(crimson_seastore_srcs cached_extent.cc + lba_mapping.cc seastore_types.cc segment_manager.cc segment_manager/ephemeral.cc @@ -19,7 +20,6 @@ set(crimson_seastore_srcs omap_manager.cc omap_manager/btree/btree_omap_manager.cc omap_manager/btree/omap_btree_node_impl.cc - btree/btree_range_pin.cc btree/fixed_kv_node.cc onode.cc onode_manager/staged-fltree/node.cc diff --git a/src/crimson/os/seastore/async_cleaner.h b/src/crimson/os/seastore/async_cleaner.h index 01ab44c4c7c..1cef771aeb8 100644 --- a/src/crimson/os/seastore/async_cleaner.h +++ b/src/crimson/os/seastore/async_cleaner.h @@ -17,6 +17,7 @@ #include "crimson/os/seastore/randomblock_manager_group.h" #include "crimson/os/seastore/transaction.h" #include "crimson/os/seastore/segment_seq_allocator.h" +#include "crimson/os/seastore/backref_mapping.h" namespace crimson::os::seastore { diff --git a/src/crimson/os/seastore/backref/btree_backref_manager.h b/src/crimson/os/seastore/backref/btree_backref_manager.h index 38084bb00e6..24897dd55da 100644 --- a/src/crimson/os/seastore/backref/btree_backref_manager.h +++ b/src/crimson/os/seastore/backref/btree_backref_manager.h @@ -9,44 +9,28 @@ namespace crimson::os::seastore::backref { -constexpr size_t BACKREF_BLOCK_SIZE = 4096; - -class BtreeBackrefMapping : public BtreeNodeMapping<paddr_t, laddr_t> { - extent_types_t type; +class BtreeBackrefMapping : public BackrefMapping { public: BtreeBackrefMapping(op_context_t<paddr_t> ctx) - : BtreeNodeMapping(ctx) {} + : BackrefMapping(ctx) {} BtreeBackrefMapping( op_context_t<paddr_t> ctx, CachedExtentRef parent, uint16_t pos, backref_map_val_t &val, backref_node_meta_t &&meta) - : BtreeNodeMapping( + : BackrefMapping( + val.type, ctx, parent, pos, val.laddr, val.len, - std::forward<backref_node_meta_t>(meta)), - type(val.type) - {} - extent_types_t get_type() const final { - return type; - } - - bool is_clone() const final { - return false; - } - -protected: - std::unique_ptr<BtreeNodeMapping<paddr_t, laddr_t>> _duplicate( - op_context_t<paddr_t> ctx) const final { - return std::unique_ptr<BtreeNodeMapping<paddr_t, laddr_t>>( - new BtreeBackrefMapping(ctx)); - } + std::forward<backref_node_meta_t>(meta)) {} }; +constexpr size_t BACKREF_BLOCK_SIZE = 4096; + using BackrefBtree = FixedKVBtree< paddr_t, backref_map_val_t, BackrefInternalNode, BackrefLeafNode, BtreeBackrefMapping, BACKREF_BLOCK_SIZE, false>; diff --git a/src/crimson/os/seastore/backref_manager.h b/src/crimson/os/seastore/backref_manager.h index 3feedb997b4..8c746b571b2 100644 --- a/src/crimson/os/seastore/backref_manager.h +++ b/src/crimson/os/seastore/backref_manager.h @@ -6,6 +6,7 @@ #include "crimson/os/seastore/cache.h" #include "crimson/os/seastore/cached_extent.h" #include "crimson/os/seastore/transaction.h" +#include "crimson/os/seastore/backref_mapping.h" namespace crimson::os::seastore { diff --git a/src/crimson/os/seastore/backref_mapping.h b/src/crimson/os/seastore/backref_mapping.h new file mode 100644 index 00000000000..d0a6a0ea6ff --- /dev/null +++ b/src/crimson/os/seastore/backref_mapping.h @@ -0,0 +1,27 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "crimson/os/seastore/btree/btree_range_pin.h" + +namespace crimson::os::seastore { + +class BackrefMapping : public BtreeNodeMapping<paddr_t, laddr_t> { + extent_types_t type; +public: + BackrefMapping(op_context_t<paddr_t> ctx) + : BtreeNodeMapping(ctx) {} + template <typename... T> + BackrefMapping(extent_types_t type, T&&... t) + : BtreeNodeMapping(std::forward<T>(t)...), + type(type) {} + extent_types_t get_type() const { + return type; + } +}; + +using BackrefMappingRef = std::unique_ptr<BackrefMapping>; +using backref_pin_list_t = std::list<BackrefMappingRef>; + +} // namespace crimson::os::seastore diff --git a/src/crimson/os/seastore/btree/btree_range_pin.cc b/src/crimson/os/seastore/btree/btree_range_pin.cc deleted file mode 100644 index f0d507a24c4..00000000000 --- a/src/crimson/os/seastore/btree/btree_range_pin.cc +++ /dev/null @@ -1,54 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include "crimson/os/seastore/btree/btree_range_pin.h" -#include "crimson/os/seastore/btree/fixed_kv_node.h" - -namespace crimson::os::seastore { - -template <typename key_t, typename val_t> -get_child_ret_t<LogicalCachedExtent> -BtreeNodeMapping<key_t, val_t>::get_logical_extent( - Transaction &t) -{ - ceph_assert(is_parent_viewable()); - assert(pos != std::numeric_limits<uint16_t>::max()); - ceph_assert(t.get_trans_id() == ctx.trans.get_trans_id()); - auto &p = (FixedKVNode<key_t>&)*parent; - auto k = this->is_indirect() - ? this->get_intermediate_base() - : get_key(); - auto v = p.template get_child<LogicalCachedExtent>(ctx, pos, k); - if (!v.has_child()) { - this->child_pos = v.get_child_pos(); - } - return v; -} - -template <typename key_t, typename val_t> -bool BtreeNodeMapping<key_t, val_t>::is_stable() const -{ - assert(!this->parent_modified()); - assert(pos != std::numeric_limits<uint16_t>::max()); - auto &p = (FixedKVNode<key_t>&)*parent; - auto k = this->is_indirect() - ? this->get_intermediate_base() - : get_key(); - return p.is_child_stable(ctx, pos, k); -} - -template <typename key_t, typename val_t> -bool BtreeNodeMapping<key_t, val_t>::is_data_stable() const -{ - assert(!this->parent_modified()); - assert(pos != std::numeric_limits<uint16_t>::max()); - auto &p = (FixedKVNode<key_t>&)*parent; - auto k = this->is_indirect() - ? this->get_intermediate_base() - : get_key(); - return p.is_child_data_stable(ctx, pos, k); -} - -template class BtreeNodeMapping<laddr_t, paddr_t>; -template class BtreeNodeMapping<paddr_t, laddr_t>; -} // namespace crimson::os::seastore diff --git a/src/crimson/os/seastore/btree/btree_range_pin.h b/src/crimson/os/seastore/btree/btree_range_pin.h index 91751801e5d..bfd350a8bed 100644 --- a/src/crimson/os/seastore/btree/btree_range_pin.h +++ b/src/crimson/os/seastore/btree/btree_range_pin.h @@ -7,11 +7,12 @@ #include "crimson/common/log.h" -#include "crimson/os/seastore/cache.h" #include "crimson/os/seastore/cached_extent.h" #include "crimson/os/seastore/seastore_types.h" +#include "crimson/os/seastore/transaction.h" namespace crimson::os::seastore { +class Cache; template <typename node_key_t> struct op_context_t { @@ -116,8 +117,6 @@ protected: extent_len_t len = 0; fixed_kv_node_meta_t<key_t> range; uint16_t pos = std::numeric_limits<uint16_t>::max(); - - virtual std::unique_ptr<BtreeNodeMapping> _duplicate(op_context_t<key_t>) const = 0; fixed_kv_node_meta_t<key_t> _get_pin_range() const { return range; } @@ -139,11 +138,7 @@ public: len(len), range(meta), pos(pos) - { - if (!parent->is_pending()) { - this->child_pos = {parent, pos}; - } - } + {} CachedExtentRef get_parent() const final { return parent; @@ -162,11 +157,6 @@ public: return len; } - extent_types_t get_type() const override { - ceph_abort("should never happen"); - return extent_types_t::ROOT; - } - val_t get_val() const final { if constexpr (std::is_same_v<val_t, paddr_t>) { return value.get_paddr(); @@ -180,16 +170,6 @@ public: return range.begin; } - PhysicalNodeMappingRef<key_t, val_t> duplicate() const final { - auto ret = _duplicate(ctx); - ret->range = range; - ret->value = value; - ret->parent = parent; - ret->len = len; - ret->pos = pos; - return ret; - } - bool has_been_invalidated() const final { return parent->has_been_invalidated(); } @@ -215,9 +195,6 @@ public: return unviewable; } - get_child_ret_t<LogicalCachedExtent> get_logical_extent(Transaction&) final; - bool is_stable() const final; - bool is_data_stable() const final; bool is_parent_viewable() const final { ceph_assert(parent); if (!parent->is_valid()) { diff --git a/src/crimson/os/seastore/cached_extent.cc b/src/crimson/os/seastore/cached_extent.cc index ab2492f5bb6..49fede1d9a8 100644 --- a/src/crimson/os/seastore/cached_extent.cc +++ b/src/crimson/os/seastore/cached_extent.cc @@ -7,6 +7,7 @@ #include "crimson/common/log.h" #include "crimson/os/seastore/btree/fixed_kv_node.h" +#include "crimson/os/seastore/lba_mapping.h" namespace { [[maybe_unused]] seastar::logger& logger() { @@ -142,6 +143,12 @@ void LogicalCachedExtent::on_replace_prior() { parent->children[off] = this; } +void LogicalCachedExtent::maybe_set_intermediate_laddr(LBAMapping &mapping) { + laddr = mapping.is_indirect() + ? mapping.get_intermediate_base() + : mapping.get_key(); +} + parent_tracker_t::~parent_tracker_t() { // this is parent's tracker, reset it auto &p = (FixedKVNode<laddr_t>&)*parent; @@ -150,32 +157,6 @@ parent_tracker_t::~parent_tracker_t() { } } -std::ostream &operator<<(std::ostream &out, const LBAMapping &rhs) -{ - out << "LBAMapping(" << rhs.get_key() - << "~0x" << std::hex << rhs.get_length() << std::dec - << "->" << rhs.get_val(); - if (rhs.is_indirect()) { - out << ",indirect(" << rhs.get_intermediate_base() - << "~0x" << std::hex << rhs.get_intermediate_length() - << "@0x" << rhs.get_intermediate_offset() << std::dec - << ")"; - } - out << ")"; - return out; -} - -std::ostream &operator<<(std::ostream &out, const lba_pin_list_t &rhs) -{ - bool first = true; - out << '['; - for (const auto &i: rhs) { - out << (first ? "" : ",") << *i; - first = false; - } - return out << ']'; -} - bool BufferSpace::is_range_loaded(extent_len_t offset, extent_len_t length) const { assert(length > 0); diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index f9356f40b83..9dc60d719eb 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -1279,7 +1279,6 @@ private: }; class ChildableCachedExtent; -class LogicalCachedExtent; class child_pos_t { public: @@ -1337,48 +1336,18 @@ using PhysicalNodeMappingRef = std::unique_ptr<PhysicalNodeMapping<key_t, val_t> template <typename key_t, typename val_t> class PhysicalNodeMapping { public: + PhysicalNodeMapping() = default; + PhysicalNodeMapping(const PhysicalNodeMapping&) = delete; virtual extent_len_t get_length() const = 0; - virtual extent_types_t get_type() const = 0; virtual val_t get_val() const = 0; virtual key_t get_key() const = 0; - virtual PhysicalNodeMappingRef<key_t, val_t> duplicate() const = 0; - virtual PhysicalNodeMappingRef<key_t, val_t> refresh_with_pending_parent() { - ceph_abort("impossible"); - return {}; - } virtual bool has_been_invalidated() const = 0; virtual CachedExtentRef get_parent() const = 0; virtual uint16_t get_pos() const = 0; - // An lba pin may be indirect, see comments in lba_manager/btree/btree_lba_manager.h - virtual bool is_indirect() const { return false; } - virtual key_t get_intermediate_key() const { return min_max_t<key_t>::null; } - virtual key_t get_intermediate_base() const { return min_max_t<key_t>::null; } - virtual extent_len_t get_intermediate_length() const { return 0; } virtual uint32_t get_checksum() const { ceph_abort("impossible"); return 0; } - // The start offset of the pin, must be 0 if the pin is not indirect - virtual extent_len_t get_intermediate_offset() const { - return std::numeric_limits<extent_len_t>::max(); - } - - virtual get_child_ret_t<LogicalCachedExtent> - get_logical_extent(Transaction &t) = 0; - - void link_child(ChildableCachedExtent *c) { - ceph_assert(child_pos); - child_pos->link_child(c); - } - - // For reserved mappings, the return values are - // undefined although it won't crash - virtual bool is_stable() const = 0; - virtual bool is_data_stable() const = 0; - virtual bool is_clone() const = 0; - bool is_zero_reserved() const { - return !get_val().is_real(); - } virtual bool is_parent_viewable() const = 0; virtual bool is_parent_valid() const = 0; virtual bool parent_modified() const { @@ -1391,24 +1360,8 @@ public: } virtual ~PhysicalNodeMapping() {} -protected: - std::optional<child_pos_t> child_pos = std::nullopt; }; -using LBAMapping = PhysicalNodeMapping<laddr_t, paddr_t>; -using LBAMappingRef = PhysicalNodeMappingRef<laddr_t, paddr_t>; - -std::ostream &operator<<(std::ostream &out, const LBAMapping &rhs); - -using lba_pin_list_t = std::list<LBAMappingRef>; - -std::ostream &operator<<(std::ostream &out, const lba_pin_list_t &rhs); - -using BackrefMapping = PhysicalNodeMapping<paddr_t, laddr_t>; -using BackrefMappingRef = PhysicalNodeMappingRef<paddr_t, laddr_t>; - -using backref_pin_list_t = std::list<BackrefMappingRef>; - /** * RetiredExtentPlaceholder * @@ -1522,6 +1475,8 @@ private: return out; } }; + +class LBAMapping; /** * LogicalCachedExtent * @@ -1556,11 +1511,7 @@ public: laddr = nladdr; } - void maybe_set_intermediate_laddr(LBAMapping &mapping) { - laddr = mapping.is_indirect() - ? mapping.get_intermediate_base() - : mapping.get_key(); - } + void maybe_set_intermediate_laddr(LBAMapping &mapping); void apply_delta_and_adjust_crc( paddr_t base, const ceph::bufferlist &bl) final { @@ -1660,8 +1611,6 @@ using lextent_list_t = addr_extent_list_base_t< } #if FMT_VERSION >= 90000 -template <> struct fmt::formatter<crimson::os::seastore::lba_pin_list_t> : fmt::ostream_formatter {}; template <> struct fmt::formatter<crimson::os::seastore::CachedExtent> : fmt::ostream_formatter {}; template <> struct fmt::formatter<crimson::os::seastore::LogicalCachedExtent> : fmt::ostream_formatter {}; -template <> struct fmt::formatter<crimson::os::seastore::LBAMapping> : fmt::ostream_formatter {}; #endif diff --git a/src/crimson/os/seastore/lba_manager.h b/src/crimson/os/seastore/lba_manager.h index a050b2cdf47..9a34bf56157 100644 --- a/src/crimson/os/seastore/lba_manager.h +++ b/src/crimson/os/seastore/lba_manager.h @@ -19,6 +19,7 @@ #include "crimson/os/seastore/cache.h" #include "crimson/os/seastore/seastore_types.h" +#include "crimson/os/seastore/lba_mapping.h" namespace crimson::os::seastore { diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc index 007737ff450..888d3c359ac 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc +++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc @@ -94,6 +94,45 @@ void unlink_phy_tree_root_node<laddr_t>(RootBlockRef &root_block) { namespace crimson::os::seastore::lba_manager::btree { +get_child_ret_t<LogicalCachedExtent> +BtreeLBAMapping::get_logical_extent(Transaction &t) +{ + ceph_assert(is_parent_viewable()); + assert(pos != std::numeric_limits<uint16_t>::max()); + ceph_assert(t.get_trans_id() == ctx.trans.get_trans_id()); + auto &p = static_cast<LBALeafNode&>(*parent); + auto k = this->is_indirect() + ? this->get_intermediate_base() + : get_key(); + auto v = p.template get_child<LogicalCachedExtent>(ctx, pos, k); + if (!v.has_child()) { + this->child_pos = v.get_child_pos(); + } + return v; +} + +bool BtreeLBAMapping::is_stable() const +{ + assert(!this->parent_modified()); + assert(pos != std::numeric_limits<uint16_t>::max()); + auto &p = static_cast<LBALeafNode&>(*parent); + auto k = this->is_indirect() + ? this->get_intermediate_base() + : get_key(); + return p.is_child_stable(ctx, pos, k); +} + +bool BtreeLBAMapping::is_data_stable() const +{ + assert(!this->parent_modified()); + assert(pos != std::numeric_limits<uint16_t>::max()); + auto &p = static_cast<LBALeafNode&>(*parent); + auto k = this->is_indirect() + ? this->get_intermediate_base() + : get_key(); + return p.is_child_data_stable(ctx, pos, k); +} + BtreeLBAManager::mkfs_ret BtreeLBAManager::mkfs( Transaction &t) diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h index ef10ff9623b..e0902053d0e 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h +++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h @@ -23,11 +23,15 @@ #include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" #include "crimson/os/seastore/btree/btree_range_pin.h" +namespace crimson::os::seastore { +class LogicalCachedExtent; +} + namespace crimson::os::seastore::lba_manager::btree { struct LBALeafNode; -class BtreeLBAMapping : public BtreeNodeMapping<laddr_t, paddr_t> { +class BtreeLBAMapping : public LBAMapping { // To support cloning, there are two kinds of lba mappings: // 1. physical lba mapping: the pladdr in the value of which is the paddr of // the corresponding extent; @@ -61,14 +65,14 @@ class BtreeLBAMapping : public BtreeNodeMapping<laddr_t, paddr_t> { // their keys. public: BtreeLBAMapping(op_context_t<laddr_t> ctx) - : BtreeNodeMapping(ctx) {} + : LBAMapping(ctx) {} BtreeLBAMapping( op_context_t<laddr_t> c, LBALeafNodeRef parent, uint16_t pos, lba_map_val_t &val, lba_node_meta_t meta) - : BtreeNodeMapping( + : LBAMapping( c, parent, pos, @@ -190,8 +194,12 @@ public: SUBDEBUGT(seastore_lba, "new pin {}", ctx.trans, static_cast<LBAMapping&>(*new_pin)); return new_pin; } + bool is_stable() const final; + bool is_data_stable() const final; + get_child_ret_t<LogicalCachedExtent> get_logical_extent(Transaction &t); + protected: - std::unique_ptr<BtreeNodeMapping<laddr_t, paddr_t>> _duplicate( + LBAMappingRef _duplicate( op_context_t<laddr_t> ctx) const final { auto pin = std::unique_ptr<BtreeLBAMapping>(new BtreeLBAMapping(ctx)); pin->key = key; diff --git a/src/crimson/os/seastore/lba_mapping.cc b/src/crimson/os/seastore/lba_mapping.cc new file mode 100644 index 00000000000..90fae09ce21 --- /dev/null +++ b/src/crimson/os/seastore/lba_mapping.cc @@ -0,0 +1,44 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "lba_mapping.h" + +namespace crimson::os::seastore { + +std::ostream &operator<<(std::ostream &out, const LBAMapping &rhs) +{ + out << "LBAMapping(" << rhs.get_key() + << "~0x" << std::hex << rhs.get_length() << std::dec + << "->" << rhs.get_val(); + if (rhs.is_indirect()) { + out << ",indirect(" << rhs.get_intermediate_base() + << "~0x" << std::hex << rhs.get_intermediate_length() + << "@0x" << rhs.get_intermediate_offset() << std::dec + << ")"; + } + out << ")"; + return out; +} + +std::ostream &operator<<(std::ostream &out, const lba_pin_list_t &rhs) +{ + bool first = true; + out << '['; + for (const auto &i: rhs) { + out << (first ? "" : ",") << *i; + first = false; + } + return out << ']'; +} + +LBAMappingRef LBAMapping::duplicate() const { + auto ret = _duplicate(ctx); + ret->range = range; + ret->value = value; + ret->parent = parent; + ret->len = len; + ret->pos = pos; + return ret; +} + +} // namespace crimson::os::seastore diff --git a/src/crimson/os/seastore/lba_mapping.h b/src/crimson/os/seastore/lba_mapping.h new file mode 100644 index 00000000000..338d4d53f55 --- /dev/null +++ b/src/crimson/os/seastore/lba_mapping.h @@ -0,0 +1,73 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "crimson/os/seastore/cached_extent.h" +#include "crimson/os/seastore/btree/btree_range_pin.h" + +namespace crimson::os::seastore { + +class LBAMapping; +using LBAMappingRef = std::unique_ptr<LBAMapping>; + +class LogicalCachedExtent; + +class LBAMapping : public BtreeNodeMapping<laddr_t, paddr_t> { +public: + LBAMapping(op_context_t<laddr_t> ctx) + : BtreeNodeMapping<laddr_t, paddr_t>(ctx) {} + template <typename... T> + LBAMapping(T&&... t) + : BtreeNodeMapping<laddr_t, paddr_t>(std::forward<T>(t)...) + { + if (!parent->is_pending()) { + this->child_pos = {parent, pos}; + } + } + + // An lba pin may be indirect, see comments in lba_manager/btree/btree_lba_manager.h + virtual bool is_indirect() const = 0; + virtual laddr_t get_intermediate_key() const = 0; + virtual laddr_t get_intermediate_base() const = 0; + virtual extent_len_t get_intermediate_length() const = 0; + // The start offset of the pin, must be 0 if the pin is not indirect + virtual extent_len_t get_intermediate_offset() const = 0; + + virtual get_child_ret_t<LogicalCachedExtent> + get_logical_extent(Transaction &t) = 0; + + void link_child(ChildableCachedExtent *c) { + ceph_assert(child_pos); + child_pos->link_child(c); + } + virtual LBAMappingRef refresh_with_pending_parent() = 0; + + // For reserved mappings, the return values are + // undefined although it won't crash + virtual bool is_stable() const = 0; + virtual bool is_data_stable() const = 0; + virtual bool is_clone() const = 0; + bool is_zero_reserved() const { + return !get_val().is_real(); + } + + LBAMappingRef duplicate() const; + + virtual ~LBAMapping() {} +protected: + virtual LBAMappingRef _duplicate(op_context_t<laddr_t>) const = 0; + std::optional<child_pos_t> child_pos = std::nullopt; +}; + +std::ostream &operator<<(std::ostream &out, const LBAMapping &rhs); +using lba_pin_list_t = std::list<LBAMappingRef>; + +std::ostream &operator<<(std::ostream &out, const lba_pin_list_t &rhs); + +} // namespace crimson::os::seastore + +#if FMT_VERSION >= 90000 +template <> struct fmt::formatter<crimson::os::seastore::LBAMapping> : fmt::ostream_formatter {}; +template <> struct fmt::formatter<crimson::os::seastore::lba_pin_list_t> : fmt::ostream_formatter {}; +#endif diff --git a/src/include/random.h b/src/include/random.h index f2e3e37bcd7..6b7c9405efd 100644 --- a/src/include/random.h +++ b/src/include/random.h @@ -16,9 +16,9 @@ #define CEPH_RANDOM_H 1 #include <mutex> +#include <optional> #include <random> #include <type_traits> -#include <boost/optional.hpp> // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85494 #ifdef __MINGW32__ @@ -123,7 +123,7 @@ void randomize_rng() template <typename EngineT> EngineT& engine() { - thread_local boost::optional<EngineT> rng_engine; + thread_local std::optional<EngineT> rng_engine; if (!rng_engine) { rng_engine.emplace(EngineT()); diff --git a/src/kv/KeyValueDB.h b/src/kv/KeyValueDB.h index 858742d511e..d926840180e 100644 --- a/src/kv/KeyValueDB.h +++ b/src/kv/KeyValueDB.h @@ -9,6 +9,7 @@ #include <map> #include <optional> #include <string> +#include <string_view> #include <boost/scoped_ptr.hpp> #include "include/encoding.h" #include "common/Formatter.h" @@ -211,6 +212,10 @@ public: return ""; } virtual ceph::buffer::list value() = 0; + // When valid() returns true, value returned as string-view + // is guaranteed to be valid until iterator is moved to another + // position; that is until call to next() / seek_to_first() / etc. + virtual std::string_view value_as_sv() = 0; virtual int status() = 0; virtual ~SimplestIteratorImpl() {} }; @@ -220,7 +225,12 @@ public: virtual ~IteratorImpl() {} virtual int seek_to_last() = 0; virtual int prev() = 0; + // When valid() returns true, key returned as string-view + // is guaranteed to be valid until iterator is moved to another + // position; that is until call to next() / seek_to_first() / etc. + virtual std::string_view key_as_sv() = 0; virtual std::pair<std::string, std::string> raw_key() = 0; + virtual std::pair<std::string_view, std::string_view> raw_key_as_sv() = 0; virtual ceph::buffer::ptr value_as_ptr() { ceph::buffer::list bl = value(); if (bl.length() == 1) { @@ -247,7 +257,9 @@ public: virtual int next() = 0; virtual int prev() = 0; virtual std::string key() = 0; + virtual std::string_view key_as_sv() = 0; virtual std::pair<std::string,std::string> raw_key() = 0; + virtual std::pair<std::string_view, std::string_view> raw_key_as_sv() = 0; virtual bool raw_key_is_prefixed(const std::string &prefix) = 0; virtual ceph::buffer::list value() = 0; virtual ceph::buffer::ptr value_as_ptr() { @@ -258,6 +270,7 @@ public: return ceph::buffer::ptr(); } } + virtual std::string_view value_as_sv() = 0; virtual int status() = 0; virtual size_t key_size() { return 0; @@ -315,15 +328,24 @@ private: std::string key() override { return generic_iter->key(); } + std::string_view key_as_sv() override { + return generic_iter->key_as_sv(); + } std::pair<std::string, std::string> raw_key() override { return generic_iter->raw_key(); } + std::pair<std::string_view, std::string_view> raw_key_as_sv() override { + return generic_iter->raw_key_as_sv(); + } ceph::buffer::list value() override { return generic_iter->value(); } ceph::buffer::ptr value_as_ptr() override { return generic_iter->value_as_ptr(); } + std::string_view value_as_sv() override { + return generic_iter->value_as_sv(); + } int status() override { return generic_iter->status(); } diff --git a/src/kv/RocksDBStore.cc b/src/kv/RocksDBStore.cc index ca63ea06484..51d224b67c0 100644 --- a/src/kv/RocksDBStore.cc +++ b/src/kv/RocksDBStore.cc @@ -6,6 +6,7 @@ #include <memory> #include <set> #include <string> +#include <string_view> #include <errno.h> #include <unistd.h> #include <sys/types.h> @@ -47,6 +48,7 @@ using std::ostream; using std::pair; using std::set; using std::string; +using std::string_view; using std::unique_ptr; using std::vector; @@ -1992,7 +1994,7 @@ int RocksDBStore::split_key(rocksdb::Slice in, string *prefix, string *key) // Find separator inside Slice char* separator = (char*) memchr(in.data(), 0, in.size()); - if (separator == NULL) + if (separator == nullptr) return -EINVAL; prefix_len = size_t(separator - in.data()); if (prefix_len >= in.size()) @@ -2006,6 +2008,27 @@ int RocksDBStore::split_key(rocksdb::Slice in, string *prefix, string *key) return 0; } +// TODO: deduplicate the code, preferrably by removing the string variant +int RocksDBStore::split_key(rocksdb::Slice in, string_view *prefix, string_view *key) +{ + size_t prefix_len = 0; + + // Find separator inside Slice + char* separator = (char*) memchr(in.data(), 0, in.size()); + if (separator == nullptr) + return -EINVAL; + prefix_len = size_t(separator - in.data()); + if (prefix_len >= in.size()) + return -EINVAL; + + // Fetch prefix and/or key directly from Slice + if (prefix) + *prefix = string_view(in.data(), prefix_len); + if (key) + *key = string_view(separator + 1, in.size() - prefix_len - 1); + return 0; +} + void RocksDBStore::compact() { dout(2) << __func__ << " starting" << dendl; @@ -2226,7 +2249,13 @@ int RocksDBStore::RocksDBWholeSpaceIteratorImpl::prev() string RocksDBStore::RocksDBWholeSpaceIteratorImpl::key() { string out_key; - split_key(dbiter->key(), 0, &out_key); + split_key(dbiter->key(), nullptr, &out_key); + return out_key; +} +string_view RocksDBStore::RocksDBWholeSpaceIteratorImpl::key_as_sv() +{ + string_view out_key; + split_key(dbiter->key(), nullptr, &out_key); return out_key; } pair<string,string> RocksDBStore::RocksDBWholeSpaceIteratorImpl::raw_key() @@ -2235,6 +2264,12 @@ pair<string,string> RocksDBStore::RocksDBWholeSpaceIteratorImpl::raw_key() split_key(dbiter->key(), &prefix, &key); return make_pair(prefix, key); } +pair<string_view,string_view> RocksDBStore::RocksDBWholeSpaceIteratorImpl::raw_key_as_sv() +{ + string_view prefix, key; + split_key(dbiter->key(), &prefix, &key); + return make_pair(prefix, key); +} bool RocksDBStore::RocksDBWholeSpaceIteratorImpl::raw_key_is_prefixed(const string &prefix) { // Look for "prefix\0" right in rocksb::Slice @@ -2267,6 +2302,12 @@ bufferptr RocksDBStore::RocksDBWholeSpaceIteratorImpl::value_as_ptr() return bufferptr(val.data(), val.size()); } +std::string_view RocksDBStore::RocksDBWholeSpaceIteratorImpl::value_as_sv() +{ + rocksdb::Slice val = dbiter->value(); + return std::string_view{val.data(), val.size()}; +} + int RocksDBStore::RocksDBWholeSpaceIteratorImpl::status() { return dbiter->status().ok() ? 0 : -1; @@ -2348,9 +2389,15 @@ public: string key() override { return dbiter->key().ToString(); } + string_view key_as_sv() override { + return dbiter->key().ToStringView(); + } std::pair<std::string, std::string> raw_key() override { return make_pair(prefix, key()); } + std::pair<std::string_view, std::string_view> raw_key_as_sv() override { + return make_pair(prefix, dbiter->key().ToStringView()); + } bufferlist value() override { return to_bufferlist(dbiter->value()); } @@ -2358,6 +2405,10 @@ public: rocksdb::Slice val = dbiter->value(); return bufferptr(val.data(), val.size()); } + std::string_view value_as_sv() override { + rocksdb::Slice val = dbiter->value(); + return std::string_view{val.data(), val.size()}; + } int status() override { return dbiter->status().ok() ? 0 : -1; } @@ -2668,6 +2719,15 @@ public: } } + std::string_view key_as_sv() override + { + if (smaller == on_main) { + return main->key_as_sv(); + } else { + return current_shard->second->key_as_sv(); + } + } + std::pair<std::string,std::string> raw_key() override { if (smaller == on_main) { @@ -2677,6 +2737,15 @@ public: } } + std::pair<std::string_view,std::string_view> raw_key_as_sv() override + { + if (smaller == on_main) { + return main->raw_key_as_sv(); + } else { + return { current_shard->first, current_shard->second->key_as_sv() }; + } + } + bool raw_key_is_prefixed(const std::string &prefix) override { if (smaller == on_main) { @@ -2695,6 +2764,15 @@ public: } } + std::string_view value_as_sv() override + { + if (smaller == on_main) { + return main->value_as_sv(); + } else { + return current_shard->second->value_as_sv(); + } + } + int status() override { //because we already had to inspect key, it must be ok @@ -3017,9 +3095,15 @@ public: string key() override { return iters[0]->key().ToString(); } + string_view key_as_sv() override { + return iters[0]->key().ToStringView(); + } std::pair<std::string, std::string> raw_key() override { return make_pair(prefix, key()); } + std::pair<std::string_view, std::string_view> raw_key_as_sv() override { + return make_pair(prefix, iters[0]->key().ToStringView()); + } bufferlist value() override { return to_bufferlist(iters[0]->value()); } @@ -3027,6 +3111,10 @@ public: rocksdb::Slice val = iters[0]->value(); return bufferptr(val.data(), val.size()); } + std::string_view value_as_sv() override { + rocksdb::Slice val = iters[0]->value(); + return std::string_view{val.data(), val.size()}; + } int status() override { return iters[0]->status().ok() ? 0 : -1; } diff --git a/src/kv/RocksDBStore.h b/src/kv/RocksDBStore.h index 477b209854c..50b91be2bf6 100644 --- a/src/kv/RocksDBStore.h +++ b/src/kv/RocksDBStore.h @@ -386,10 +386,13 @@ public: int next() override; int prev() override; std::string key() override; + std::string_view key_as_sv() override; std::pair<std::string,std::string> raw_key() override; + std::pair<std::string_view,std::string_view> raw_key_as_sv() override; bool raw_key_is_prefixed(const std::string &prefix) override; ceph::bufferlist value() override; ceph::bufferptr value_as_ptr() override; + std::string_view value_as_sv() override; int status() override; size_t key_size() override; size_t value_size() override; @@ -419,6 +422,7 @@ public: } static int split_key(rocksdb::Slice in, std::string *prefix, std::string *key); + static int split_key(rocksdb::Slice in, std::string_view *prefix, std::string_view *key); static std::string past_prefix(const std::string &prefix); diff --git a/src/librbd/migration/HttpClient.cc b/src/librbd/migration/HttpClient.cc index 09fe91da02a..d212981a917 100644 --- a/src/librbd/migration/HttpClient.cc +++ b/src/librbd/migration/HttpClient.cc @@ -193,7 +193,7 @@ protected: ldout(cct, 15) << dendl; boost::system::error_code ec; - boost::beast::get_lowest_layer(derived().stream()).socket().close(ec); + derived().stream().lowest_layer().close(ec); } private: @@ -357,8 +357,7 @@ private: } int shutdown_socket() { - if (!boost::beast::get_lowest_layer( - derived().stream()).socket().is_open()) { + if (!derived().stream().lowest_layer().is_open()) { return 0; } @@ -366,7 +365,7 @@ private: ldout(cct, 15) << dendl; boost::system::error_code ec; - boost::beast::get_lowest_layer(derived().stream()).socket().shutdown( + derived().stream().lowest_layer().shutdown( boost::asio::ip::tcp::socket::shutdown_both, ec); if (ec && ec != boost::beast::errc::not_connected) { @@ -595,7 +594,7 @@ public: this->close_socket(); } - inline boost::beast::tcp_stream& + inline boost::asio::ip::tcp::socket& stream() { return m_stream; } @@ -607,12 +606,13 @@ protected: auto cct = http_client->m_cct; ldout(cct, 15) << dendl; - ceph_assert(!m_stream.socket().is_open()); - m_stream.async_connect( - results, - [on_finish](boost::system::error_code ec, const auto& endpoint) { - on_finish->complete(-ec.value()); - }); + ceph_assert(!m_stream.is_open()); + boost::asio::async_connect(m_stream, + results, + [on_finish](boost::system::error_code ec, + const auto& endpoint) { + on_finish->complete(-ec.value()); + }); } void disconnect(Context* on_finish) override { @@ -624,7 +624,7 @@ protected: } private: - boost::beast::tcp_stream m_stream; + boost::asio::ip::tcp::socket m_stream; }; #undef dout_prefix @@ -643,7 +643,7 @@ public: this->close_socket(); } - inline boost::beast::ssl_stream<boost::beast::tcp_stream>& + inline boost::asio::ssl::stream<boost::asio::ip::tcp::socket>& stream() { return m_stream; } @@ -655,8 +655,9 @@ protected: auto cct = http_client->m_cct; ldout(cct, 15) << dendl; - ceph_assert(!boost::beast::get_lowest_layer(m_stream).socket().is_open()); - boost::beast::get_lowest_layer(m_stream).async_connect( + ceph_assert(!m_stream.lowest_layer().is_open()); + async_connect( + m_stream.lowest_layer(), results, [this, on_finish](boost::system::error_code ec, const auto& endpoint) { handle_connect(-ec.value(), on_finish); @@ -681,12 +682,12 @@ protected: // ssl_stream object can't be reused after shut down -- move-in // a freshly constructed instance - m_stream = boost::beast::ssl_stream<boost::beast::tcp_stream>( + m_stream = boost::asio::ssl::stream<boost::asio::ip::tcp::socket>( http_client->m_strand, http_client->m_ssl_context); } private: - boost::beast::ssl_stream<boost::beast::tcp_stream> m_stream; + boost::asio::ssl::stream<boost::asio::ip::tcp::socket> m_stream; void handle_connect(int r, Context* on_finish) { auto http_client = this->m_http_client; diff --git a/src/librbd/migration/HttpClient.h b/src/librbd/migration/HttpClient.h index 3997e6159e7..5844f918693 100644 --- a/src/librbd/migration/HttpClient.h +++ b/src/librbd/migration/HttpClient.h @@ -13,13 +13,12 @@ #include <boost/asio/strand.hpp> #include <boost/asio/ip/tcp.hpp> #include <boost/asio/ssl/context.hpp> +#include <boost/asio/ssl/stream.hpp> #include <boost/beast/version.hpp> -#include <boost/beast/core/tcp_stream.hpp> #include <boost/beast/http/empty_body.hpp> #include <boost/beast/http/message.hpp> #include <boost/beast/http/string_body.hpp> #include <boost/beast/http/write.hpp> -#include <boost/beast/ssl/ssl_stream.hpp> #include <functional> #include <memory> #include <string> @@ -97,7 +96,7 @@ public: completion(r, std::move(response)); } - void operator()(boost::beast::tcp_stream& stream) override { + void operator()(boost::asio::ip::tcp::socket& stream) override { preprocess_request(); boost::beast::http::async_write( @@ -110,7 +109,7 @@ public: } void operator()( - boost::beast::ssl_stream<boost::beast::tcp_stream>& stream) override { + boost::asio::ssl::stream<boost::asio::ip::tcp::socket>& stream) override { preprocess_request(); boost::beast::http::async_write( @@ -152,9 +151,9 @@ private: virtual bool need_eof() const = 0; virtual bool header_only() const = 0; virtual void complete(int r, Response&&) = 0; - virtual void operator()(boost::beast::tcp_stream& stream) = 0; + virtual void operator()(boost::asio::ip::tcp::socket& stream) = 0; virtual void operator()( - boost::beast::ssl_stream<boost::beast::tcp_stream>& stream) = 0; + boost::asio::ssl::stream<boost::asio::ip::tcp::socket>& stream) = 0; }; template <typename D> struct HttpSession; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 5874a3dce56..e66b5aa08c7 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -4167,7 +4167,7 @@ void Server::handle_client_getattr(const MDRequestRef& mdr, bool is_lookup) if (r < 0) { // fall-thru. let rdlock_path_pin_ref() check again. - } else if (is_lookup) { + } else if (is_lookup && mdr->dn[0].size()) { CDentry* dn = mdr->dn[0].back(); mdr->pin(dn); auto em = dn->batch_ops.emplace(std::piecewise_construct, std::forward_as_tuple(mask), std::forward_as_tuple()); @@ -4274,7 +4274,7 @@ void Server::handle_client_getattr(const MDRequestRef& mdr, bool is_lookup) // reply dout(10) << "reply to stat on " << *req << dendl; mdr->tracei = ref; - if (is_lookup) + if (is_lookup && mdr->dn[0].size()) mdr->tracedn = mdr->dn[0].back(); respond_to_request(mdr, 0); } diff --git a/src/mgr/PyModule.cc b/src/mgr/PyModule.cc index cff63ef4a6b..4f996489ba0 100644 --- a/src/mgr/PyModule.cc +++ b/src/mgr/PyModule.cc @@ -38,6 +38,18 @@ std::string PyModule::mgr_store_prefix = "mgr/"; #define BOOST_BIND_GLOBAL_PLACEHOLDERS // Boost apparently can't be bothered to fix its own usage of its own // deprecated features. + +// Fix instances of "'BOOST_PP_ITERATION_02' was not declared in this scope; did +// you mean 'BOOST_PP_ITERATION_05'" and related macro error bullshit that spans +// 300 lines of errors +// +// Apparently you can't include boost/python stuff _and_ have this header +// defined +// +// Thanks to the ceph-aur folks for the fix at: +// https://github.com/bazaah/aur-ceph/commit/8c5cc7d8deec002f7596b6d0860859a0a718f12b +#undef BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS + #include <boost/python/extract.hpp> #include <boost/python/import.hpp> #include <boost/python/object.hpp> diff --git a/src/mgr/PyModule.h b/src/mgr/PyModule.h index 177447c2cb3..a47db3a47ef 100644 --- a/src/mgr/PyModule.h +++ b/src/mgr/PyModule.h @@ -161,9 +161,9 @@ public: } const std::string &get_name() const { - std::lock_guard l(lock) ; return module_name; + return module_name; } - const std::string &get_error_string() const { + std::string get_error_string() const { std::lock_guard l(lock) ; return error_string; } bool get_can_run() const { diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 7332ec3edb1..833bdddc71b 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -4024,7 +4024,7 @@ void Monitor::handle_command(MonOpRequestRef op) for (auto& p : mgrstatmon()->get_service_map().services) { auto &service = p.first; - if (ServiceMap::is_normal_ceph_entity(service)) { + if (ServiceMap::is_normal_ceph_entity(service) || service == "nvmeof") { continue; } f->open_object_section(service.c_str()); diff --git a/src/msg/async/AsyncMessenger.cc b/src/msg/async/AsyncMessenger.cc index 2e38bd434a8..6b3a8c3f6dc 100644 --- a/src/msg/async/AsyncMessenger.cc +++ b/src/msg/async/AsyncMessenger.cc @@ -207,22 +207,22 @@ void Processor::accept() } else if (r == -EAGAIN) { break; } else if (r == -EMFILE || r == -ENFILE) { - lderr(msgr->cct) << __func__ << " open file descriptions limit reached sd = " << listen_socket.fd() + lderr(msgr->cct) << __func__ << " open file descriptors limit reached fd = " << listen_socket.fd() << " errno " << r << " " << cpp_strerror(r) << dendl; if (++accept_error_num > msgr->cct->_conf->ms_max_accept_failures) { - lderr(msgr->cct) << "Proccessor accept has encountered enough error numbers, just do ceph_abort()." << dendl; + lderr(msgr->cct) << "Proccessor accept has encountered too many errors, just do ceph_abort()." << dendl; ceph_abort(); } continue; } else if (r == -ECONNABORTED) { - ldout(msgr->cct, 0) << __func__ << " it was closed because of rst arrived sd = " << listen_socket.fd() + ldout(msgr->cct, 0) << __func__ << " closed because of rst arrival fd = " << listen_socket.fd() << " errno " << r << " " << cpp_strerror(r) << dendl; continue; } else { lderr(msgr->cct) << __func__ << " no incoming connection?" << " errno " << r << " " << cpp_strerror(r) << dendl; if (++accept_error_num > msgr->cct->_conf->ms_max_accept_failures) { - lderr(msgr->cct) << "Proccessor accept has encountered enough error numbers, just do ceph_abort()." << dendl; + lderr(msgr->cct) << "Proccessor accept has encountered too many errors, just do ceph_abort()." << dendl; ceph_abort(); } continue; diff --git a/src/os/DBObjectMap.cc b/src/os/DBObjectMap.cc index 7da9a67be62..65627b5f818 100644 --- a/src/os/DBObjectMap.cc +++ b/src/os/DBObjectMap.cc @@ -519,6 +519,11 @@ bufferlist DBObjectMap::DBObjectMapIteratorImpl::value() return cur_iter->value(); } +std::string_view DBObjectMap::DBObjectMapIteratorImpl::value_as_sv() +{ + return cur_iter->value_as_sv(); +} + int DBObjectMap::DBObjectMapIteratorImpl::status() { return r; diff --git a/src/os/DBObjectMap.h b/src/os/DBObjectMap.h index 444f21eb815..1e1452010e7 100644 --- a/src/os/DBObjectMap.h +++ b/src/os/DBObjectMap.h @@ -393,6 +393,7 @@ private: int next() override { ceph_abort(); return 0; } std::string key() override { ceph_abort(); return ""; } ceph::buffer::list value() override { ceph_abort(); return ceph::buffer::list(); } + std::string_view value_as_sv() override { ceph_abort(); return std::string_view(); } int status() override { return 0; } }; @@ -431,6 +432,7 @@ private: int next() override; std::string key() override; ceph::buffer::list value() override; + std::string_view value_as_sv() override; int status() override; bool on_parent() { diff --git a/src/os/ObjectStore.h b/src/os/ObjectStore.h index 521435b6c31..df3ae920a2f 100644 --- a/src/os/ObjectStore.h +++ b/src/os/ObjectStore.h @@ -29,6 +29,7 @@ #include <errno.h> #include <sys/stat.h> +#include <functional> #include <map> #include <memory> #include <vector> @@ -735,15 +736,6 @@ public: std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values ) = 0; -#ifdef WITH_SEASTAR - virtual int omap_get_values( - CollectionHandle &c, ///< [in] Collection containing oid - const ghobject_t &oid, ///< [in] Object containing omap - const std::optional<std::string> &start_after, ///< [in] Keys to get - std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values - ) = 0; -#endif - /// Filters keys into out which are defined on oid virtual int omap_check_keys( CollectionHandle &c, ///< [in] Collection containing oid @@ -766,6 +758,48 @@ public: const ghobject_t &oid ///< [in] object ) = 0; + struct omap_iter_seek_t { + std::string seek_position; + enum { + // start with provided key (seek_position), if it exists + LOWER_BOUND, + // skip provided key (seek_position) even if it exists + UPPER_BOUND + } seek_type = LOWER_BOUND; + static omap_iter_seek_t min_lower_bound() { return {}; } + }; + enum class omap_iter_ret_t { + STOP, + NEXT + }; + /** + * Iterate over object map with user-provided callable + * + * Warning! The callable is executed under lock on bluestore + * operations in c. Do not use bluestore methods on c while + * iterating. (Filling in a transaction is no problem). + * + * @param c collection + * @param oid object + * @param start_from where the iterator should point to at + * the beginning + * @param visitor callable that takes OMAP key and corresponding + * value as string_views and controls iteration + * by the return. It is executed for every object's + * OMAP entry from `start_from` till end of the + * object's OMAP or till the iteration is stopped + * by `STOP`. Please note that if there is no such + * entry, `visitor` will be called 0 times. + * @return error code, zero on success + */ + virtual int omap_iterate( + CollectionHandle &c, + const ghobject_t &oid, + omap_iter_seek_t start_from, + std::function<omap_iter_ret_t(std::string_view, + std::string_view)> visitor + ) = 0; + virtual int flush_journal() { return -EOPNOTSUPP; } virtual int dump_journal(std::ostream& out) { return -EOPNOTSUPP; } diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index e123a0a200a..2f88acdc93b 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -1706,7 +1706,8 @@ int BlueFS::_replay(bool noop, bool to_stdout) << " fnode=" << fnode << " delta=" << delta << dendl; - ceph_assert(delta.offset == fnode.allocated); + // be leanient, if there is no extents just produce error message + ceph_assert(delta.offset == fnode.allocated || delta.extents.empty()); } if (cct->_conf->bluefs_log_replay_check_allocations) { int r = _check_allocations(fnode, @@ -3830,6 +3831,7 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/ fnode.size = offset; fnode.allocated = new_allocated; fnode.reset_delta(); + fnode.recalc_allocated(); log.t.op_file_update(fnode); // sad, but is_dirty must be set to signal flushing of the log h->file->is_dirty = true; diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index a024a0c2105..25e6c4fe596 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -4830,7 +4830,7 @@ void BlueStore::Onode::rewrite_omap_key(const string& old, string *out) out->append(old.c_str() + out->length(), old.size() - out->length()); } -void BlueStore::Onode::decode_omap_key(const string& key, string *user_key) +size_t BlueStore::Onode::calc_userkey_offset_in_omap_key() const { size_t pos = sizeof(uint64_t) + 1; if (!onode.is_pgmeta_omap()) { @@ -4840,9 +4840,15 @@ void BlueStore::Onode::decode_omap_key(const string& key, string *user_key) pos += sizeof(uint64_t); } } - *user_key = key.substr(pos); + return pos; } +void BlueStore::Onode::decode_omap_key(const string& key, string *user_key) +{ + *user_key = key.substr(calc_userkey_offset_in_omap_key()); +} + + void BlueStore::Onode::finish_write(TransContext* txc, uint32_t offset, uint32_t length) { while (true) { @@ -5519,7 +5525,13 @@ BlueStore::OmapIteratorImpl::OmapIteratorImpl( if (o->onode.has_omap()) { o->get_omap_key(string(), &head); o->get_omap_tail(&tail); + auto start1 = mono_clock::now(); it->lower_bound(head); + c->store->log_latency( + __func__, + l_bluestore_omap_seek_to_first_lat, + mono_clock::now() - start1, + c->store->cct->_conf->bluestore_log_omap_iterator_age); } } BlueStore::OmapIteratorImpl::~OmapIteratorImpl() @@ -5654,6 +5666,13 @@ bufferlist BlueStore::OmapIteratorImpl::value() return it->value(); } +std::string_view BlueStore::OmapIteratorImpl::value_as_sv() +{ + std::shared_lock l(c->lock); + ceph_assert(it->valid()); + return it->value_as_sv(); +} + // ===================================== @@ -13601,52 +13620,6 @@ int BlueStore::omap_get_values( return r; } -#ifdef WITH_SEASTAR -int BlueStore::omap_get_values( - CollectionHandle &c_, ///< [in] Collection containing oid - const ghobject_t &oid, ///< [in] Object containing omap - const std::optional<string> &start_after, ///< [in] Keys to get - map<string, bufferlist> *output ///< [out] Returned keys and values - ) -{ - Collection *c = static_cast<Collection *>(c_.get()); - dout(15) << __func__ << " " << c->get_cid() << " oid " << oid << dendl; - if (!c->exists) - return -ENOENT; - std::shared_lock l(c->lock); - int r = 0; - OnodeRef o = c->get_onode(oid, false); - if (!o || !o->exists) { - r = -ENOENT; - goto out; - } - if (!o->onode.has_omap()) { - goto out; - } - o->flush(); - { - ObjectMap::ObjectMapIterator iter = get_omap_iterator(c_, oid); - if (!iter) { - r = -ENOENT; - goto out; - } - if (start_after) { - iter->upper_bound(*start_after); - } else { - iter->seek_to_first(); - } - for (; iter->valid(); iter->next()) { - output->insert(make_pair(iter->key(), iter->value())); - } - } - -out: - dout(10) << __func__ << " " << c->get_cid() << " oid " << oid << " = " << r - << dendl; - return r; -} -#endif - int BlueStore::omap_check_keys( CollectionHandle &c_, ///< [in] Collection containing oid const ghobject_t &oid, ///< [in] Object containing omap @@ -13724,6 +13697,94 @@ ObjectMap::ObjectMapIterator BlueStore::get_omap_iterator( return ObjectMap::ObjectMapIterator(new OmapIteratorImpl(logger,c, o, it)); } +int BlueStore::omap_iterate( + CollectionHandle &c_, ///< [in] collection + const ghobject_t &oid, ///< [in] object + ObjectStore::omap_iter_seek_t start_from, ///< [in] where the iterator should point to at the beginning + std::function<omap_iter_ret_t(std::string_view, std::string_view)> f + ) +{ + Collection *c = static_cast<Collection *>(c_.get()); + dout(10) << __func__ << " " << c->get_cid() << " " << oid << dendl; + if (!c->exists) { + return -ENOENT; + } + std::shared_lock l(c->lock); + OnodeRef o = c->get_onode(oid, false); + if (!o || !o->exists) { + dout(10) << __func__ << " " << oid << "doesn't exist" <<dendl; + return -ENOENT; + } + o->flush(); + dout(10) << __func__ << " has_omap = " << (int)o->onode.has_omap() <<dendl; + if (!o->onode.has_omap()) { + // nothing to do + return 0; + } + + KeyValueDB::Iterator it; + { + auto bounds = KeyValueDB::IteratorBounds(); + std::string lower_bound, upper_bound; + o->get_omap_key(string(), &lower_bound); + o->get_omap_tail(&upper_bound); + bounds.lower_bound = std::move(lower_bound); + bounds.upper_bound = std::move(upper_bound); + it = db->get_iterator(o->get_omap_prefix(), 0, std::move(bounds)); + } + + // seek the iterator + { + std::string key; + o->get_omap_key(start_from.seek_position, &key); + auto start = ceph::mono_clock::now(); + if (start_from.seek_type == omap_iter_seek_t::LOWER_BOUND) { + it->lower_bound(key); + c->store->log_latency( + __func__, + l_bluestore_omap_lower_bound_lat, + ceph::mono_clock::now() - start, + c->store->cct->_conf->bluestore_log_omap_iterator_age); + } else { + it->upper_bound(key); + c->store->log_latency( + __func__, + l_bluestore_omap_upper_bound_lat, + ceph::mono_clock::now() - start, + c->store->cct->_conf->bluestore_log_omap_iterator_age); + } + } + + // iterate! + std::string tail; + o->get_omap_tail(&tail); + const std::string_view::size_type userkey_offset_in_dbkey = + o->calc_userkey_offset_in_omap_key(); + ceph::timespan next_lat_acc{0}; + while (it->valid()) { + const auto& db_key = it->raw_key_as_sv().second; + if (db_key >= tail) { + break; + } + std::string_view user_key = db_key.substr(userkey_offset_in_dbkey); + omap_iter_ret_t ret = f(user_key, it->value_as_sv()); + if (ret == omap_iter_ret_t::STOP) { + break; + } else if (ret == omap_iter_ret_t::NEXT) { + ceph::time_guard<ceph::mono_clock>{next_lat_acc}; + it->next(); + } else { + ceph_abort(); + } + } + c->store->log_latency( + __func__, + l_bluestore_omap_next_lat, + next_lat_acc, + c->store->cct->_conf->bluestore_log_omap_iterator_age); + return 0; +} + // ----------------- // write helpers diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 99f8d057cf0..5549f97ffea 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -1457,6 +1457,7 @@ public: } void rewrite_omap_key(const std::string& old, std::string *out); + size_t calc_userkey_offset_in_omap_key() const; void decode_omap_key(const std::string& key, std::string *user_key); void finish_write(TransContext* txc, uint32_t offset, uint32_t length); @@ -1753,6 +1754,7 @@ public: int next() override; std::string key() override; ceph::buffer::list value() override; + std::string_view value_as_sv() override; std::string tail_key() override { return tail; } @@ -3416,15 +3418,6 @@ public: std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values ) override; -#ifdef WITH_SEASTAR - int omap_get_values( - CollectionHandle &c, ///< [in] Collection containing oid - const ghobject_t &oid, ///< [in] Object containing omap - const std::optional<std::string> &start_after, ///< [in] Keys to get - std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values - ) override; -#endif - /// Filters keys into out which are defined on oid int omap_check_keys( CollectionHandle &c, ///< [in] Collection containing oid @@ -3438,6 +3431,13 @@ public: const ghobject_t &oid ///< [in] object ) override; + int omap_iterate( + CollectionHandle &c, ///< [in] collection + const ghobject_t &oid, ///< [in] object + omap_iter_seek_t start_from, ///< [in] where the iterator should point to at the beginning + std::function<omap_iter_ret_t(std::string_view, std::string_view)> f + ) override; + void set_fsid(uuid_d u) override { fsid = u; } diff --git a/src/os/bluestore/bluefs_types.cc b/src/os/bluestore/bluefs_types.cc index e18dd490140..fe77f7f74d8 100644 --- a/src/os/bluestore/bluefs_types.cc +++ b/src/os/bluestore/bluefs_types.cc @@ -154,7 +154,9 @@ mempool::bluefs::vector<bluefs_extent_t>::iterator bluefs_fnode_t::seek( assert(it != extents_index.begin()); --it; assert(offset >= *it); - p += it - extents_index.begin(); + uint32_t skip = it - extents_index.begin(); + ceph_assert(skip <= extents.size()); + p += skip; offset -= *it; } diff --git a/src/os/bluestore/bluefs_types.h b/src/os/bluestore/bluefs_types.h index 627118c12f8..08b3ca0cf41 100644 --- a/src/os/bluestore/bluefs_types.h +++ b/src/os/bluestore/bluefs_types.h @@ -89,6 +89,7 @@ struct bluefs_fnode_t { void recalc_allocated() { allocated = 0; extents_index.reserve(extents.size()); + extents_index.clear(); for (auto& p : extents) { extents_index.emplace_back(allocated); allocated += p.length; diff --git a/src/os/kstore/KStore.cc b/src/os/kstore/KStore.cc index 7158486ca38..a069d429155 100644 --- a/src/os/kstore/KStore.cc +++ b/src/os/kstore/KStore.cc @@ -1651,6 +1651,13 @@ bufferlist KStore::OmapIteratorImpl::value() return it->value(); } +std::string_view KStore::OmapIteratorImpl::value_as_sv() +{ + std::shared_lock l{c->lock}; + ceph_assert(it->valid()); + return it->value_as_sv(); +} + int KStore::omap_get( CollectionHandle& ch, ///< [in] Collection containing oid const ghobject_t &oid, ///< [in] Object containing omap @@ -1866,6 +1873,71 @@ ObjectMap::ObjectMapIterator KStore::get_omap_iterator( return ObjectMap::ObjectMapIterator(new OmapIteratorImpl(c, o, it)); } +int KStore::omap_iterate( + CollectionHandle &ch, ///< [in] collection + const ghobject_t &oid, ///< [in] object + ObjectStore::omap_iter_seek_t start_from, ///< [in] where the iterator should point to at the beginning + std::function<omap_iter_ret_t(std::string_view, std::string_view)> f) +{ + dout(10) << __func__ << " " << ch->cid << " " << oid << dendl; + Collection *c = static_cast<Collection*>(ch.get()); + { + std::shared_lock l{c->lock}; + + OnodeRef o = c->get_onode(oid, false); + if (!o || !o->exists) { + dout(10) << __func__ << " " << oid << "doesn't exist" <<dendl; + return -ENOENT; + } + o->flush(); + dout(10) << __func__ << " header = " << o->onode.omap_head <<dendl; + + KeyValueDB::Iterator it = db->get_iterator(PREFIX_OMAP); + std::string tail; + std::string seek_key; + if (o->onode.omap_head) { + return 0; // nothing to do + } + + // acquire data depedencies for seek & iterate + get_omap_key(o->onode.omap_head, start_from.seek_position, &seek_key); + get_omap_tail(o->onode.omap_head, &tail); + + // acquire the iterator + { + it = db->get_iterator(PREFIX_OMAP); + } + + // seek the iterator + { + if (start_from.seek_type == omap_iter_seek_t::LOWER_BOUND) { + it->lower_bound(seek_key); + } else { + it->upper_bound(seek_key); + } + } + + // iterate! + while (it->valid()) { + std::string user_key; + if (const auto& db_key = it->raw_key().second; db_key >= tail) { + break; + } else { + decode_omap_key(db_key, &user_key); + } + omap_iter_ret_t ret = f(user_key, it->value_as_sv()); + if (ret == omap_iter_ret_t::STOP) { + break; + } else if (ret == omap_iter_ret_t::NEXT) { + it->next(); + } else { + ceph_abort(); + } + } + } + return 0; +} + // ----------------- // write helpers diff --git a/src/os/kstore/KStore.h b/src/os/kstore/KStore.h index 9a9d413c66a..06115d3cab7 100644 --- a/src/os/kstore/KStore.h +++ b/src/os/kstore/KStore.h @@ -180,6 +180,7 @@ public: int next() override; std::string key() override; ceph::buffer::list value() override; + std::string_view value_as_sv() override; int status() override { return 0; } @@ -553,6 +554,13 @@ public: const ghobject_t &oid ///< [in] object ) override; + int omap_iterate( + CollectionHandle &c, ///< [in] collection + const ghobject_t &oid, ///< [in] object + omap_iter_seek_t start_from, ///< [in] where the iterator should point to at the beginning + std::function<omap_iter_ret_t(std::string_view, std::string_view)> f + ) override; + void set_fsid(uuid_d u) override { fsid = u; } diff --git a/src/os/memstore/MemStore.cc b/src/os/memstore/MemStore.cc index 89cb09361cf..f9d3bf0d8a2 100644 --- a/src/os/memstore/MemStore.cc +++ b/src/os/memstore/MemStore.cc @@ -537,30 +537,6 @@ int MemStore::omap_get_values( return 0; } -#ifdef WITH_SEASTAR -int MemStore::omap_get_values( - CollectionHandle& ch, ///< [in] Collection containing oid - const ghobject_t &oid, ///< [in] Object containing omap - const std::optional<std::string> &start_after, ///< [in] Keys to get - std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values - ) -{ - dout(10) << __func__ << " " << ch->cid << " " << oid << dendl; - Collection *c = static_cast<Collection*>(ch.get()); - ObjectRef o = c->get_object(oid); - if (!o) - return -ENOENT; - assert(start_after); - std::lock_guard lock{o->omap_mutex}; - for (auto it = o->omap.upper_bound(*start_after); - it != std::end(o->omap); - ++it) { - out->insert(*it); - } - return 0; -} -#endif - int MemStore::omap_check_keys( CollectionHandle& ch, ///< [in] Collection containing oid const ghobject_t &oid, ///< [in] Object containing omap @@ -622,6 +598,10 @@ public: std::lock_guard lock{o->omap_mutex}; return it->second; } + std::string_view value_as_sv() override { + std::lock_guard lock{o->omap_mutex}; + return std::string_view{it->second.c_str(), it->second.length()}; + } int status() override { return 0; } @@ -639,6 +619,48 @@ ObjectMap::ObjectMapIterator MemStore::get_omap_iterator( return ObjectMap::ObjectMapIterator(new OmapIteratorImpl(c, o)); } +int MemStore::omap_iterate( + CollectionHandle &ch, ///< [in] collection + const ghobject_t &oid, ///< [in] object + ObjectStore::omap_iter_seek_t start_from, ///< [in] where the iterator should point to at the beginning + std::function<omap_iter_ret_t(std::string_view, std::string_view)> f) +{ + Collection *c = static_cast<Collection*>(ch.get()); + ObjectRef o = c->get_object(oid); + if (!o) { + return -ENOENT; + } + + { + std::lock_guard lock{o->omap_mutex}; + + // obtain seek the iterator + decltype(o->omap)::iterator it; + { + if (start_from.seek_type == omap_iter_seek_t::LOWER_BOUND) { + it = o->omap.lower_bound(start_from.seek_position); + } else { + it = o->omap.upper_bound(start_from.seek_position); + } + } + + // iterate! + while (it != o->omap.end()) { + // potentially rectifying memcpy but who cares for memstore? + omap_iter_ret_t ret = + f(it->first, std::string_view{it->second.c_str(), it->second.length()}); + if (ret == omap_iter_ret_t::STOP) { + break; + } else if (ret == omap_iter_ret_t::NEXT) { + ++it; + } else { + ceph_abort(); + } + } + } + return 0; +} + // --------------- // write operations diff --git a/src/os/memstore/MemStore.h b/src/os/memstore/MemStore.h index 2abe552891f..9621773598f 100644 --- a/src/os/memstore/MemStore.h +++ b/src/os/memstore/MemStore.h @@ -363,14 +363,6 @@ public: const std::set<std::string> &keys, ///< [in] Keys to get std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values ) override; -#ifdef WITH_SEASTAR - int omap_get_values( - CollectionHandle &c, ///< [in] Collection containing oid - const ghobject_t &oid, ///< [in] Object containing omap - const std::optional<std::string> &start_after, ///< [in] Keys to get - std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values - ) override; -#endif using ObjectStore::omap_check_keys; /// Filters keys into out which are defined on oid @@ -387,6 +379,13 @@ public: const ghobject_t &oid ///< [in] object ) override; + int omap_iterate( + CollectionHandle &c, ///< [in] collection + const ghobject_t &oid, ///< [in] object + omap_iter_seek_t start_from, ///< [in] where the iterator should point to at the beginning + std::function<omap_iter_ret_t(std::string_view, std::string_view)> f + ) override; + void set_fsid(uuid_d u) override; uuid_d get_fsid() override; diff --git a/src/osd/ExtentCache.h b/src/osd/ExtentCache.h index 972228cd077..7dc1d4f7263 100644 --- a/src/osd/ExtentCache.h +++ b/src/osd/ExtentCache.h @@ -363,7 +363,7 @@ private: extent, boost::intrusive::list_member_hook<>, &extent::pin_list_member>; - using list = boost::intrusive::list<extent, list_member_options>; + using list = boost::intrusive::list<extent, boost::intrusive::constant_time_size<false>, list_member_options>; list pin_list; ~pin_state() { ceph_assert(pin_list.empty()); diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 75484edb75d..3324ba9dc91 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -7786,27 +7786,34 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) bool truncated = false; bufferlist bl; if (oi.is_omap()) { - ObjectMap::ObjectMapIterator iter = osd->store->get_omap_iterator( - ch, ghobject_t(soid) - ); - if (!iter) { - result = -ENOENT; - goto fail; - } - iter->upper_bound(start_after); - if (filter_prefix > start_after) iter->lower_bound(filter_prefix); - for (num = 0; - iter->valid() && - iter->key().substr(0, filter_prefix.size()) == filter_prefix; - ++num, iter->next()) { - dout(20) << "Found key " << iter->key() << dendl; - if (num >= max_return || - bl.length() >= cct->_conf->osd_max_omap_bytes_per_request) { - truncated = true; - break; - } - encode(iter->key(), bl); - encode(iter->value(), bl); + using omap_iter_seek_t = ObjectStore::omap_iter_seek_t; + result = osd->store->omap_iterate( + ch, ghobject_t(soid), + // try to seek as many keys-at-once as possible for the sake of performance. + // note complexity should be logarithmic, so seek(n/2) + seek(n/2) is worse + // than just seek(n). + ObjectStore::omap_iter_seek_t{ + .seek_position = std::max(start_after, filter_prefix), + .seek_type = filter_prefix > start_after ? omap_iter_seek_t::LOWER_BOUND + : omap_iter_seek_t::UPPER_BOUND + }, + [&bl, &truncated, &filter_prefix, &num, max_return, + max_bytes=cct->_conf->osd_max_omap_bytes_per_request] + (std::string_view key, std::string_view value) mutable { + if (key.substr(0, filter_prefix.size()) != filter_prefix) { + return ObjectStore::omap_iter_ret_t::STOP; + } + if (num >= max_return || bl.length() >= max_bytes) { + truncated = true; + return ObjectStore::omap_iter_ret_t::STOP; + } + encode(key, bl); + encode(value, bl); + ++num; + return ObjectStore::omap_iter_ret_t::NEXT; + }); + if (result < 0) { + goto fail; } } // else return empty out_set encode(num, osd_op.outdata); diff --git a/src/osd/Session.h b/src/osd/Session.h index 9fa9c655456..05a0119d31e 100644 --- a/src/osd/Session.h +++ b/src/osd/Session.h @@ -136,7 +136,7 @@ struct Session : public RefCountedObject { ceph::mutex session_dispatch_lock = ceph::make_mutex("Session::session_dispatch_lock"); - boost::intrusive::list<OpRequest> waiting_on_map; + boost::intrusive::list<OpRequest, boost::intrusive::constant_time_size<false>> waiting_on_map; ceph::spinlock projected_epoch_lock; epoch_t projected_epoch = 0; diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 1e92d5cd3d6..485fddead7a 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1151,9 +1151,8 @@ public: bool is_set(key_t key) const; template<typename T> - void set(key_t key, const T &val) { - value_t value = val; - opts[key] = value; + void set(key_t key, T &&val) { + opts.insert_or_assign(key, std::forward<T>(val)); } template<typename T> diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index 087b623333b..82d43bb3dde 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -1393,7 +1393,7 @@ void Objecter::handle_osd_map(MOSDMap *m) for (auto& [c, ec] : p->second) { asio::post(service.get_executor(), asio::append(std::move(c), ec)); } - waiting_for_map.erase(p++); + p = waiting_for_map.erase(p); } monc->sub_got("osdmap", osdmap->get_epoch()); diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index f1c56d75378..550604fc55b 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -2036,8 +2036,8 @@ class CertKeyStore(): var = service_name if entity in self.service_name_cert else host j = {} self.known_certs[entity][var] = cert_obj - for service_name in self.known_certs[entity].keys(): - j[var] = Cert.to_json(self.known_certs[entity][var]) + for cert_key in self.known_certs[entity]: + j[cert_key] = Cert.to_json(self.known_certs[entity][cert_key]) else: self.known_certs[entity] = cert_obj j = Cert.to_json(cert_obj) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index bf14f8d1715..6690153d435 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -2460,7 +2460,7 @@ Then run the following: @handle_orch_error def service_action(self, action: str, service_name: str) -> List[str]: - if service_name not in self.spec_store.all_specs.keys(): + if service_name not in self.spec_store.all_specs.keys() and service_name != 'osd': raise OrchestratorError(f'Invalid service name "{service_name}".' + ' View currently running services using "ceph orch ls"') dds: List[DaemonDescription] = self.cache.get_daemons_by_service(service_name) @@ -3925,6 +3925,50 @@ Then run the following: return self.to_remove_osds.all_osds() @handle_orch_error + def set_osd_spec(self, service_name: str, osd_ids: List[str]) -> str: + """ + Update unit.meta file for osd with service name + """ + if service_name not in self.spec_store: + raise OrchestratorError(f"Cannot find service '{service_name}' in the inventory. " + "Please try again after applying an OSD service that matches " + "the service name to which you want to attach OSDs.") + + daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_type('osd') + update_osd = defaultdict(list) + for daemon in daemons: + if daemon.daemon_id in osd_ids and daemon.hostname: + update_osd[daemon.hostname].append(daemon.daemon_id) + + if not update_osd: + raise OrchestratorError(f"Unable to find OSDs: {osd_ids}") + + failed_osds = [] + success_osds = [] + for host in update_osd: + osds = ",".join(update_osd[host]) + # run cephadm command with all host osds on specific host, + # if it fails, continue with other hosts + try: + with self.async_timeout_handler(host): + outs, errs, _code = self.wait_async( + CephadmServe(self)._run_cephadm(host, + cephadmNoImage, + 'update-osd-service', + ['--service-name', service_name, '--osd-ids', osds])) + if _code: + self.log.error(f"Failed to update service for {osds} osd. Cephadm error: {errs}") + failed_osds.extend(update_osd[host]) + else: + success_osds.extend(update_osd[host]) + except Exception: + self.log.exception(f"Failed to set service name for {osds}") + failed_osds.extend(update_osd[host]) + self.cache.invalidate_host_daemons(host) + self._kick_serve_loop() + return f"Updated service for osd {','.join(success_osds)}" + (f" and failed for {','.join(failed_osds)}" if failed_osds else "") + + @handle_orch_error @host_exists() def drain_host(self, hostname: str, force: bool = False, keep_conf_keyring: bool = False, zap_osd_devices: bool = False) -> str: """ diff --git a/src/pybind/mgr/cephadm/schedule.py b/src/pybind/mgr/cephadm/schedule.py index 98d2fe99897..04d3712c50a 100644 --- a/src/pybind/mgr/cephadm/schedule.py +++ b/src/pybind/mgr/cephadm/schedule.py @@ -385,6 +385,8 @@ class HostAssignment(object): def find_ip_on_host(self, hostname: str, subnets: List[str]) -> Optional[str]: for subnet in subnets: + # to normalize subnet + subnet = str(ipaddress.ip_network(subnet)) ips: List[str] = [] # following is to allow loopback interfaces for both ipv4 and ipv6. Since we # only have the subnet (and no IP) we assume default loopback IP address. diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index 04f5af28a9b..4f83d7bb0fb 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -1157,6 +1157,14 @@ class RgwService(CephService): 'value': str(spec.rgw_bucket_counters_cache_size), }) + if getattr(spec, 'disable_multisite_sync_traffic', None) is not None: + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'config set', + 'who': daemon_name, + 'name': 'rgw_run_sync_thread', + 'value': 'false' if spec.disable_multisite_sync_traffic else 'true', + }) + daemon_spec.keyring = keyring daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index 1b9cf618570..9c5b5a112f3 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -3,6 +3,7 @@ import logging import os import socket from typing import List, Any, Tuple, Dict, Optional, cast +import ipaddress from mgr_module import HandleCommandResult @@ -57,6 +58,8 @@ class GrafanaService(CephadmService): if ip_to_bind_to: daemon_spec.port_ips = {str(grafana_port): ip_to_bind_to} grafana_ip = ip_to_bind_to + if ipaddress.ip_network(grafana_ip).version == 6: + grafana_ip = f"[{grafana_ip}]" domain = self.mgr.get_fqdn(daemon_spec.host) mgmt_gw_ips = [] @@ -354,6 +357,13 @@ class AlertmanagerService(CephadmService): addr = self.mgr.get_fqdn(dd.hostname) peers.append(build_url(host=addr, port=port).lstrip('/')) + ip_to_bind_to = '' + if spec.only_bind_port_on_networks and spec.networks: + assert daemon_spec.host is not None + ip_to_bind_to = self.mgr.get_first_matching_network_ip(daemon_spec.host, spec) or '' + if ip_to_bind_to: + daemon_spec.port_ips = {str(port): ip_to_bind_to} + deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') if security_enabled: alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() @@ -376,7 +386,8 @@ class AlertmanagerService(CephadmService): }, 'peers': peers, 'web_config': '/etc/alertmanager/web.yml', - 'use_url_prefix': mgmt_gw_enabled + 'use_url_prefix': mgmt_gw_enabled, + 'ip_to_bind_to': ip_to_bind_to }, sorted(deps) else: return { @@ -384,7 +395,8 @@ class AlertmanagerService(CephadmService): "alertmanager.yml": yml }, "peers": peers, - 'use_url_prefix': mgmt_gw_enabled + 'use_url_prefix': mgmt_gw_enabled, + 'ip_to_bind_to': ip_to_bind_to }, sorted(deps) def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: diff --git a/src/pybind/mgr/cephadm/ssh.py b/src/pybind/mgr/cephadm/ssh.py index 1622cb001ab..acb5a77c51b 100644 --- a/src/pybind/mgr/cephadm/ssh.py +++ b/src/pybind/mgr/cephadm/ssh.py @@ -358,7 +358,7 @@ class SSHManager: await self._check_execute_command(host, chown, addr=addr) chmod = RemoteCommand(Executables.CHMOD, [oct(mode)[2:], tmp_path]) await self._check_execute_command(host, chmod, addr=addr) - mv = RemoteCommand(Executables.MV, [tmp_path, path]) + mv = RemoteCommand(Executables.MV, ['-Z', tmp_path, path]) await self._check_execute_command(host, mv, addr=addr) except Exception as e: msg = f"Unable to write {host}:{path}: {e}" diff --git a/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 b/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 index de993cb6ce3..b6955caf616 100644 --- a/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 +++ b/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 @@ -8,6 +8,8 @@ global: tls_config: {% if security_enabled %} ca_file: root_cert.pem + cert_file: alertmanager.crt + key_file: alertmanager.key {% else %} insecure_skip_verify: true {% endif %} diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 index b9773ceeeb3..14af0fd48ca 100644 --- a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 @@ -9,6 +9,7 @@ events { http { #access_log /dev/stdout; + error_log /dev/stderr info; client_header_buffer_size 32K; large_client_header_buffers 4 32k; proxy_busy_buffers_size 512k; diff --git a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 index ecfd899af71..961da145dac 100644 --- a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 +++ b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 @@ -28,6 +28,8 @@ alerting: password: {{ service_discovery_password }} tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key {% else %} - scheme: http http_sd_configs: @@ -56,6 +58,8 @@ scrape_configs: password: {{ service_discovery_password }} tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key {% else %} honor_labels: true http_sd_configs: @@ -81,6 +85,8 @@ scrape_configs: password: {{ service_discovery_password }} tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key {% else %} http_sd_configs: - url: {{ node_exporter_sd_url }} @@ -104,6 +110,8 @@ scrape_configs: password: {{ service_discovery_password }} tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key {% else %} http_sd_configs: - url: {{ haproxy_sd_url }} @@ -128,6 +136,8 @@ scrape_configs: password: {{ service_discovery_password }} tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key {% else %} honor_labels: true http_sd_configs: @@ -149,6 +159,8 @@ scrape_configs: password: {{ service_discovery_password }} tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key {% else %} http_sd_configs: - url: {{ nvmeof_sd_url }} @@ -169,6 +181,8 @@ scrape_configs: password: {{ service_discovery_password }} tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key {% else %} http_sd_configs: - url: {{ nfs_sd_url }} @@ -189,6 +203,8 @@ scrape_configs: password: {{ service_discovery_password }} tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key {% else %} http_sd_configs: - url: {{ smb_sd_url }} diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index b81510504d9..22bd26def91 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -1741,16 +1741,23 @@ class TestCephadm(object): nvmeof_client_cert = 'fake-nvmeof-client-cert' nvmeof_server_cert = 'fake-nvmeof-server-cert' nvmeof_root_ca_cert = 'fake-nvmeof-root-ca-cert' + grafana_cert_host_1 = 'grafana-cert-host-1' + grafana_cert_host_2 = 'grafana-cert-host-2' cephadm_module.cert_key_store.save_cert('rgw_frontend_ssl_cert', rgw_frontend_rgw_foo_host2_cert, service_name='rgw.foo', user_made=True) cephadm_module.cert_key_store.save_cert('nvmeof_server_cert', nvmeof_server_cert, service_name='nvmeof.foo', user_made=True) cephadm_module.cert_key_store.save_cert('nvmeof_client_cert', nvmeof_client_cert, service_name='nvmeof.foo', user_made=True) cephadm_module.cert_key_store.save_cert('nvmeof_root_ca_cert', nvmeof_root_ca_cert, service_name='nvmeof.foo', user_made=True) + cephadm_module.cert_key_store.save_cert('grafana_cert', grafana_cert_host_1, host='host-1', user_made=True) + cephadm_module.cert_key_store.save_cert('grafana_cert', grafana_cert_host_2, host='host-2', user_made=True) expected_calls = [ mock.call(f'{CERT_STORE_CERT_PREFIX}rgw_frontend_ssl_cert', json.dumps({'rgw.foo': Cert(rgw_frontend_rgw_foo_host2_cert, True).to_json()})), mock.call(f'{CERT_STORE_CERT_PREFIX}nvmeof_server_cert', json.dumps({'nvmeof.foo': Cert(nvmeof_server_cert, True).to_json()})), mock.call(f'{CERT_STORE_CERT_PREFIX}nvmeof_client_cert', json.dumps({'nvmeof.foo': Cert(nvmeof_client_cert, True).to_json()})), mock.call(f'{CERT_STORE_CERT_PREFIX}nvmeof_root_ca_cert', json.dumps({'nvmeof.foo': Cert(nvmeof_root_ca_cert, True).to_json()})), + mock.call(f'{CERT_STORE_CERT_PREFIX}grafana_cert', json.dumps({'host-1': Cert(grafana_cert_host_1, True).to_json()})), + mock.call(f'{CERT_STORE_CERT_PREFIX}grafana_cert', json.dumps({'host-1': Cert(grafana_cert_host_1, True).to_json(), + 'host-2': Cert(grafana_cert_host_2, True).to_json()})) ] _set_store.assert_has_calls(expected_calls) @@ -1795,17 +1802,20 @@ class TestCephadm(object): cephadm_module.cert_key_store._init_known_cert_key_dicts() grafana_host1_key = 'fake-grafana-host1-key' + grafana_host2_key = 'fake-grafana-host2-key' nvmeof_client_key = 'nvmeof-client-key' nvmeof_server_key = 'nvmeof-server-key' nvmeof_encryption_key = 'nvmeof-encryption-key' - grafana_host1_key = 'fake-grafana-host1-cert' cephadm_module.cert_key_store.save_key('grafana_key', grafana_host1_key, host='host1') + cephadm_module.cert_key_store.save_key('grafana_key', grafana_host2_key, host='host2') cephadm_module.cert_key_store.save_key('nvmeof_client_key', nvmeof_client_key, service_name='nvmeof.foo') cephadm_module.cert_key_store.save_key('nvmeof_server_key', nvmeof_server_key, service_name='nvmeof.foo') cephadm_module.cert_key_store.save_key('nvmeof_encryption_key', nvmeof_encryption_key, service_name='nvmeof.foo') expected_calls = [ mock.call(f'{CERT_STORE_KEY_PREFIX}grafana_key', json.dumps({'host1': PrivKey(grafana_host1_key).to_json()})), + mock.call(f'{CERT_STORE_KEY_PREFIX}grafana_key', json.dumps({'host1': PrivKey(grafana_host1_key).to_json(), + 'host2': PrivKey(grafana_host2_key).to_json()})), mock.call(f'{CERT_STORE_KEY_PREFIX}nvmeof_client_key', json.dumps({'nvmeof.foo': PrivKey(nvmeof_client_key).to_json()})), mock.call(f'{CERT_STORE_KEY_PREFIX}nvmeof_server_key', json.dumps({'nvmeof.foo': PrivKey(nvmeof_server_key).to_json()})), mock.call(f'{CERT_STORE_KEY_PREFIX}nvmeof_encryption_key', json.dumps({'nvmeof.foo': PrivKey(nvmeof_encryption_key).to_json()})), diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index 0d89657ac8c..d872219df80 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -581,7 +581,14 @@ class TestMonitoring: mock_getfqdn.return_value = purl.hostname with with_host(cephadm_module, "test"): - with with_service(cephadm_module, AlertManagerSpec()): + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.1'] + }, + }) + with with_service(cephadm_module, AlertManagerSpec('alertmanager', + networks=['1.2.3.0/24'], + only_bind_port_on_networks=True)): y = dedent(self._get_config(expected_yaml_url)).lstrip() _run_cephadm.assert_called_with( 'test', @@ -595,11 +602,12 @@ class TestMonitoring: "deploy_arguments": [], "params": { 'tcp_ports': [9093, 9094], + 'port_ips': {"9094": "1.2.3.1"}, }, "meta": { 'service_name': 'alertmanager', 'ports': [9093, 9094], - 'ip': None, + 'ip': '1.2.3.1', 'deployed_by': [], 'rank': None, 'rank_generation': None, @@ -612,6 +620,7 @@ class TestMonitoring: }, "peers": [], "use_url_prefix": False, + "ip_to_bind_to": "1.2.3.1", } }), error_ok=True, @@ -634,8 +643,16 @@ class TestMonitoring: cephadm_module.secure_monitoring_stack = True cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user') cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password') + + cephadm_module.cache.update_host_networks('test', { + 'fd12:3456:789a::/64': { + 'if0': ['fd12:3456:789a::10'] + }, + }) with with_service(cephadm_module, MgmtGatewaySpec("mgmt-gateway")) as _, \ - with_service(cephadm_module, AlertManagerSpec()): + with_service(cephadm_module, AlertManagerSpec('alertmanager', + networks=['fd12:3456:789a::/64'], + only_bind_port_on_networks=True)): y = dedent(""" # This file is generated by cephadm. @@ -646,6 +663,8 @@ class TestMonitoring: http_config: tls_config: ca_file: root_cert.pem + cert_file: alertmanager.crt + key_file: alertmanager.key route: receiver: 'default' @@ -686,11 +705,12 @@ class TestMonitoring: "deploy_arguments": [], "params": { 'tcp_ports': [9093, 9094], + 'port_ips': {"9094": "fd12:3456:789a::10"} }, "meta": { 'service_name': 'alertmanager', 'ports': [9093, 9094], - 'ip': None, + 'ip': 'fd12:3456:789a::10', 'deployed_by': [], 'rank': None, 'rank_generation': None, @@ -708,6 +728,7 @@ class TestMonitoring: 'peers': [], 'web_config': '/etc/alertmanager/web.yml', "use_url_prefix": True, + "ip_to_bind_to": "fd12:3456:789a::10", } }), error_ok=True, @@ -741,6 +762,8 @@ class TestMonitoring: http_config: tls_config: ca_file: root_cert.pem + cert_file: alertmanager.crt + key_file: alertmanager.key route: receiver: 'default' @@ -801,6 +824,7 @@ class TestMonitoring: 'peers': [], 'web_config': '/etc/alertmanager/web.yml', "use_url_prefix": False, + "ip_to_bind_to": "", } }), error_ok=True, @@ -1170,6 +1194,8 @@ class TestMonitoring: password: sd_password tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key scrape_configs: - job_name: 'ceph' @@ -1191,6 +1217,8 @@ class TestMonitoring: password: sd_password tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key - job_name: 'node' relabel_configs: @@ -1209,6 +1237,8 @@ class TestMonitoring: password: sd_password tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key - job_name: 'haproxy' relabel_configs: @@ -1225,6 +1255,8 @@ class TestMonitoring: password: sd_password tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key - job_name: 'ceph-exporter' relabel_configs: @@ -1242,6 +1274,8 @@ class TestMonitoring: password: sd_password tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key - job_name: 'nvmeof' honor_labels: true @@ -1255,6 +1289,8 @@ class TestMonitoring: password: sd_password tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key - job_name: 'nfs' honor_labels: true @@ -1268,6 +1304,8 @@ class TestMonitoring: password: sd_password tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key - job_name: 'smb' honor_labels: true @@ -1281,6 +1319,8 @@ class TestMonitoring: password: sd_password tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key """).lstrip() @@ -2071,6 +2111,26 @@ class TestRGWService: }) assert f == expected + @pytest.mark.parametrize( + "disable_sync_traffic", + [ + (True), + (False), + ] + ) + @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_rgw_disable_sync_traffic(self, disable_sync_traffic, cephadm_module: CephadmOrchestrator): + with with_host(cephadm_module, 'host1'): + s = RGWSpec(service_id="foo", + disable_multisite_sync_traffic=disable_sync_traffic) + with with_service(cephadm_module, s) as dds: + _, f, _ = cephadm_module.check_mon_command({ + 'prefix': 'config get', + 'who': f'client.{dds[0]}', + 'key': 'rgw_run_sync_thread', + }) + assert f == ('false' if disable_sync_traffic else 'true') + class TestMonService: @@ -3874,6 +3934,7 @@ class TestMgmtGateway: http { #access_log /dev/stdout; + error_log /dev/stderr info; client_header_buffer_size 32K; large_client_header_buffers 4 32k; proxy_busy_buffers_size 512k; @@ -4121,6 +4182,7 @@ class TestMgmtGateway: http { #access_log /dev/stdout; + error_log /dev/stderr info; client_header_buffer_size 32K; large_client_header_buffers 4 32k; proxy_busy_buffers_size 512k; diff --git a/src/pybind/mgr/dashboard/HACKING.rst b/src/pybind/mgr/dashboard/HACKING.rst index 39c3d6744b9..6da428a0d5f 100644 --- a/src/pybind/mgr/dashboard/HACKING.rst +++ b/src/pybind/mgr/dashboard/HACKING.rst @@ -4,7 +4,7 @@ Ceph Dashboard Developer Documentation Note: The content of this file has been moved into the Ceph Developer Guide. If you're interested in helping with the development of the dashboard, please -see ``/doc/dev/developer_guide/dash_devel.rst`` or the `online version +see ``/doc/dev/developer_guide/dash-devel.rst`` or the `online version <https://ceph.readthedocs.io/en/latest/dev/developer_guide/dash-devel/>`_ for details on how to set up a development environment and other development-related topics. diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index a505801eea5..4fbc975ae9f 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -747,6 +747,10 @@ class Orchestrator(object): """ raise NotImplementedError() + def set_osd_spec(self, service_name: str, osd_ids: List[str]) -> OrchResult: + """ set service of osd """ + raise NotImplementedError() + def blink_device_light(self, ident_fault: str, on: bool, locations: List['DeviceLightLoc']) -> OrchResult[List[str]]: """ Instructs the orchestrator to enable or disable either the ident or the fault LED. diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index 332bc75d862..d5a1bb3da2b 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -1472,6 +1472,14 @@ Usage: return HandleCommandResult(stdout=out) + @_cli_write_command('orch osd set-spec-affinity') + def _osd_set_spec(self, service_name: str, osd_id: List[str]) -> HandleCommandResult: + """Set service spec affinity for osd""" + completion = self.set_osd_spec(service_name, osd_id) + res = raise_if_exception(completion) + + return HandleCommandResult(stdout=res) + @_cli_write_command('orch daemon add') def daemon_add_misc(self, daemon_type: Optional[ServiceType] = None, @@ -1666,7 +1674,13 @@ Usage: specs: List[Union[ServiceSpec, HostSpec]] = [] # YAML '---' document separator with no content generates # None entries in the output. Let's skip them silently. - content = [o for o in yaml_objs if o is not None] + try: + content = [o for o in yaml_objs if o is not None] + except yaml.scanner.ScannerError as e: + msg = f"Invalid YAML received : {str(e)}" + self.log.exception(msg) + return HandleCommandResult(-errno.EINVAL, stderr=msg) + for s in content: try: spec = json_to_generic_spec(s) @@ -2191,7 +2205,13 @@ Usage: specs: List[TunedProfileSpec] = [] # YAML '---' document separator with no content generates # None entries in the output. Let's skip them silently. - content = [o for o in yaml_objs if o is not None] + try: + content = [o for o in yaml_objs if o is not None] + except yaml.scanner.ScannerError as e: + msg = f"Invalid YAML received : {str(e)}" + self.log.exception(msg) + return HandleCommandResult(-errno.EINVAL, stderr=msg) + for spec in content: specs.append(TunedProfileSpec.from_json(spec)) else: diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index 8a2a38b86ee..1ac9fa49e32 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -1231,6 +1231,7 @@ class RGWSpec(ServiceSpec): rgw_bucket_counters_cache: Optional[bool] = False, rgw_bucket_counters_cache_size: Optional[int] = None, generate_cert: bool = False, + disable_multisite_sync_traffic: Optional[bool] = None, ): assert service_type == 'rgw', service_type @@ -1283,6 +1284,8 @@ class RGWSpec(ServiceSpec): self.rgw_bucket_counters_cache_size = rgw_bucket_counters_cache_size #: Whether we should generate a cert/key for the user if not provided self.generate_cert = generate_cert + #: Used to make RGW not do multisite replication so it can dedicate to IO + self.disable_multisite_sync_traffic = disable_multisite_sync_traffic def get_port_start(self) -> List[int]: return [self.get_port()] @@ -2328,6 +2331,7 @@ class AlertManagerSpec(MonitoringSpec): user_data: Optional[Dict[str, Any]] = None, config: Optional[Dict[str, str]] = None, networks: Optional[List[str]] = None, + only_bind_port_on_networks: bool = False, port: Optional[int] = None, secure: bool = False, extra_container_args: Optional[GeneralArgList] = None, @@ -2358,6 +2362,7 @@ class AlertManagerSpec(MonitoringSpec): # <webhook_configs> configuration. self.user_data = user_data or {} self.secure = secure + self.only_bind_port_on_networks = only_bind_port_on_networks def get_port_start(self) -> List[int]: return [self.get_port(), 9094] @@ -2404,7 +2409,7 @@ class GrafanaSpec(MonitoringSpec): self.protocol = protocol # whether ports daemons for this service bind to should - # bind to only hte networks listed in networks param, or + # bind to only the networks listed in networks param, or # to all networks. Defaults to false which is saying to bind # on all networks. self.only_bind_port_on_networks = only_bind_port_on_networks diff --git a/src/rgw/driver/rados/rgw_sal_rados.cc b/src/rgw/driver/rados/rgw_sal_rados.cc index 4c67d0ee71a..4c05421653b 100644 --- a/src/rgw/driver/rados/rgw_sal_rados.cc +++ b/src/rgw/driver/rados/rgw_sal_rados.cc @@ -429,6 +429,10 @@ int RadosBucket::remove(const DoutPrefixProvider* dpp, ldpp_dout(dpp, -1) << "ERROR: unable to remove notifications from bucket. ret=" << ps_ret << dendl; } + if (ret = rgw::bucketlogging::bucket_deletion_cleanup(dpp, store, this, y); ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: could not cleanup bucket logging configuration and pending objects, ret = " << ret << dendl; + } + ret = store->ctl()->bucket->unlink_bucket(rados, info.owner, info.bucket, y, dpp, false); if (ret < 0) { @@ -1024,15 +1028,15 @@ int RadosBucket::remove_topics(RGWObjVersionTracker* objv_tracker, objv_tracker, y); } -int RadosBucket::get_logging_object_name(std::string& obj_name, - const std::string& prefix, - optional_yield y, +int RadosBucket::get_logging_object_name(std::string& obj_name, + const std::string& prefix, + optional_yield y, const DoutPrefixProvider *dpp, RGWObjVersionTracker* objv_tracker) { rgw_pool data_pool; const auto obj_name_oid = bucketlogging::object_name_oid(this, prefix); if (!store->getRados()->get_obj_data_pool(get_placement_rule(), rgw_obj{get_key(), obj_name_oid}, &data_pool)) { - ldpp_dout(dpp, 1) << "failed to get data pool for bucket '" << get_name() << + ldpp_dout(dpp, 1) << "ERROR: failed to get data pool for bucket '" << get_name() << "' when getting logging object name" << dendl; return -EIO; } @@ -1048,23 +1052,23 @@ int RadosBucket::get_logging_object_name(std::string& obj_name, nullptr, nullptr); if (ret < 0) { - ldpp_dout(dpp, 1) << "failed to get logging object name from '" << obj_name_oid << "'. ret = " << ret << dendl; + ldpp_dout(dpp, 1) << "ERROR: failed to get logging object name from '" << obj_name_oid << "'. ret = " << ret << dendl; return ret; } obj_name = bl.to_str(); return 0; } -int RadosBucket::set_logging_object_name(const std::string& obj_name, - const std::string& prefix, - optional_yield y, - const DoutPrefixProvider *dpp, +int RadosBucket::set_logging_object_name(const std::string& obj_name, + const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, bool new_obj, RGWObjVersionTracker* objv_tracker) { rgw_pool data_pool; const auto obj_name_oid = bucketlogging::object_name_oid(this, prefix); if (!store->getRados()->get_obj_data_pool(get_placement_rule(), rgw_obj{get_key(), obj_name_oid}, &data_pool)) { - ldpp_dout(dpp, 1) << "failed to get data pool for bucket '" << get_name() << + ldpp_dout(dpp, 1) << "ERROR: failed to get data pool for bucket '" << get_name() << "' when setting logging object name" << dendl; return -EIO; } @@ -1080,28 +1084,65 @@ int RadosBucket::set_logging_object_name(const std::string& obj_name, y, nullptr); if (ret == -EEXIST) { - ldpp_dout(dpp, 20) << "race detected in initializing '" << obj_name_oid << "' with logging object name:'" << obj_name << "'. ret = " << ret << dendl; + ldpp_dout(dpp, 20) << "INFO: race detected in initializing '" << obj_name_oid << "' with logging object name:'" << obj_name << "'. ret = " << ret << dendl; } else if (ret == -ECANCELED) { - ldpp_dout(dpp, 20) << "race detected in updating logging object name '" << obj_name << "' at '" << obj_name_oid << "'. ret = " << ret << dendl; + ldpp_dout(dpp, 20) << "INFO: race detected in updating logging object name '" << obj_name << "' at '" << obj_name_oid << "'. ret = " << ret << dendl; } else if (ret < 0) { - ldpp_dout(dpp, 1) << "failed to set logging object name '" << obj_name << "' at '" << obj_name_oid << "'. ret = " << ret << dendl; + ldpp_dout(dpp, 1) << "ERROR: failed to set logging object name '" << obj_name << "' at '" << obj_name_oid << "'. ret = " << ret << dendl; } return ret; } +int RadosBucket::remove_logging_object_name(const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, + RGWObjVersionTracker* objv_tracker) { + rgw_pool data_pool; + const auto obj_name_oid = bucketlogging::object_name_oid(this, prefix); + if (!store->getRados()->get_obj_data_pool(get_placement_rule(), rgw_obj{get_key(), obj_name_oid}, &data_pool)) { + ldpp_dout(dpp, 1) << "ERROR: failed to get data pool for bucket '" << get_name() << + "' when setting logging object name" << dendl; + return -EIO; + } + return rgw_delete_system_obj(dpp, store->svc()->sysobj, + data_pool, + obj_name_oid, + objv_tracker, + y); +} + std::string to_temp_object_name(const rgw::sal::Bucket* bucket, const std::string& obj_name) { return fmt::format("{}__shadow_{}0", bucket->get_bucket_id(), obj_name); } +int RadosBucket::remove_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp) { + rgw_pool data_pool; + const rgw_obj head_obj{get_key(), obj_name}; + const auto placement_rule = get_placement_rule(); + + if (!store->getRados()->get_obj_data_pool(placement_rule, head_obj, &data_pool)) { + ldpp_dout(dpp, 1) << "ERROR: failed to get data pool for bucket '" << get_name() << + "' when deleting logging object" << dendl; + return -EIO; + } + + const auto temp_obj_name = to_temp_object_name(this, obj_name); + return rgw_delete_system_obj(dpp, store->svc()->sysobj, + data_pool, + temp_obj_name, + nullptr, + y); +} + int RadosBucket::commit_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp) { rgw_pool data_pool; const rgw_obj head_obj{get_key(), obj_name}; const auto placement_rule = get_placement_rule(); if (!store->getRados()->get_obj_data_pool(placement_rule, head_obj, &data_pool)) { - ldpp_dout(dpp, 1) << "failed to get data pool for bucket '" << get_name() << + ldpp_dout(dpp, 1) << "ERROR: failed to get data pool for bucket '" << get_name() << "' when comitting logging object" << dendl; return -EIO; } @@ -1110,7 +1151,6 @@ int RadosBucket::commit_logging_object(const std::string& obj_name, optional_yie std::map<string, bufferlist> obj_attrs; ceph::real_time mtime; bufferlist bl_data; - // TODO: this is needed only for etag calculation if (const auto ret = rgw_get_system_obj(store->svc()->sysobj, data_pool, temp_obj_name, @@ -1120,10 +1160,13 @@ int RadosBucket::commit_logging_object(const std::string& obj_name, optional_yie y, dpp, &obj_attrs, - nullptr); ret < 0) { - ldpp_dout(dpp, 1) << "faild to read logging data when comitting to object '" << temp_obj_name + nullptr); ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 1) << "ERROR: failed to read logging data when comitting object '" << temp_obj_name << ". error: " << ret << dendl; return ret; + } else if (ret == -ENOENT) { + ldpp_dout(dpp, 1) << "WARNING: temporary logging object '" << temp_obj_name << "' does not exists" << dendl; + return 0; } uint64_t size = bl_data.length(); @@ -1137,13 +1180,13 @@ int RadosBucket::commit_logging_object(const std::string& obj_name, optional_yie nullptr, // no special placment for tail get_key(), head_obj); ret < 0) { - ldpp_dout(dpp, 1) << "failed to create manifest when comitting logging object. error: " << + ldpp_dout(dpp, 1) << "ERROR: failed to create manifest when comitting logging object. error: " << ret << dendl; return ret; } if (const auto ret = manifest_gen.create_next(size); ret < 0) { - ldpp_dout(dpp, 1) << "failed to add object to manifest when comitting logging object. error: " << + ldpp_dout(dpp, 1) << "ERROR: failed to add object to manifest when comitting logging object. error: " << ret << dendl; return ret; } @@ -1151,7 +1194,7 @@ int RadosBucket::commit_logging_object(const std::string& obj_name, optional_yie if (const auto expected_temp_obj = manifest_gen.get_cur_obj(store->getRados()); temp_obj_name != expected_temp_obj.oid) { // TODO: cleanup temporary object, commit would never succeed - ldpp_dout(dpp, 1) << "temporary logging object name mismatch: '" << + ldpp_dout(dpp, 1) << "ERROR: temporary logging object name mismatch: '" << temp_obj_name << "' != '" << expected_temp_obj.oid << "'" << dendl; return -EINVAL; } @@ -1182,11 +1225,11 @@ int RadosBucket::commit_logging_object(const std::string& obj_name, optional_yie const req_context rctx{dpp, y, nullptr}; jspan_context trace{false, false}; if (const auto ret = head_obj_wop.write_meta(0, size, obj_attrs, rctx, trace); ret < 0) { - ldpp_dout(dpp, 1) << "failed to commit logging object '" << temp_obj_name << - "' to bucket id '" << get_bucket_id() <<"'. error: " << ret << dendl; + ldpp_dout(dpp, 1) << "ERROR: failed to commit logging object '" << temp_obj_name << + "' to bucket id '" << get_info().bucket <<"'. error: " << ret << dendl; return ret; } - ldpp_dout(dpp, 20) << "committed logging object '" << temp_obj_name << + ldpp_dout(dpp, 20) << "INFO: committed logging object '" << temp_obj_name << "' with size of " << size << " bytes, to bucket '" << get_key() << "' as '" << obj_name << "'" << dendl; return 0; @@ -1204,30 +1247,30 @@ void bucket_logging_completion(rados_completion_t completion, void* args) { auto* aio_comp = reinterpret_cast<librados::AioCompletionImpl*>(completion); std::unique_ptr<BucketLoggingCompleteArg> logging_args(reinterpret_cast<BucketLoggingCompleteArg*>(args)); if (aio_comp->get_return_value() < 0) { - ldout(logging_args->cct, 1) << "failed to complete append to logging object '" << logging_args->obj_name << + ldout(logging_args->cct, 1) << "ERROR: failed to complete append to logging object '" << logging_args->obj_name << "'. ret = " << aio_comp->get_return_value() << dendl; } else { - ldout(logging_args->cct, 20) << "wrote " << logging_args->size << " bytes to logging object '" << + ldout(logging_args->cct, 20) << "INFO: wrote " << logging_args->size << " bytes to logging object '" << logging_args->obj_name << "'" << dendl; } } -int RadosBucket::write_logging_object(const std::string& obj_name, - const std::string& record, - optional_yield y, +int RadosBucket::write_logging_object(const std::string& obj_name, + const std::string& record, + optional_yield y, const DoutPrefixProvider *dpp, bool async_completion) { const auto temp_obj_name = to_temp_object_name(this, obj_name); rgw_pool data_pool; rgw_obj obj{get_key(), obj_name}; if (!store->getRados()->get_obj_data_pool(get_placement_rule(), obj, &data_pool)) { - ldpp_dout(dpp, 1) << "failed to get data pool for bucket '" << get_name() << + ldpp_dout(dpp, 1) << "ERROR: failed to get data pool for bucket '" << get_name() << "' when writing logging object" << dendl; return -EIO; } librados::IoCtx io_ctx; if (const auto ret = rgw_init_ioctx(dpp, store->getRados()->get_rados_handle(), data_pool, io_ctx); ret < 0) { - ldpp_dout(dpp, 1) << "failed to get IO context for logging object from data pool:" << data_pool.to_str() << dendl; + ldpp_dout(dpp, 1) << "ERROR: failed to get IO context for logging object from data pool:" << data_pool.to_str() << dendl; return -EIO; } bufferlist bl; @@ -1242,7 +1285,7 @@ int RadosBucket::write_logging_object(const std::string& obj_name, auto arg = std::make_unique<BucketLoggingCompleteArg>(temp_obj_name, record.length(), store->ctx()); completion->set_complete_callback(arg.get(), bucket_logging_completion); if (const auto ret = io_ctx.aio_operate(temp_obj_name, completion.get(), &op); ret < 0) { - ldpp_dout(dpp, 1) << "failed to append to logging object '" << temp_obj_name << + ldpp_dout(dpp, 1) << "ERROR: failed to append to logging object '" << temp_obj_name << "'. ret = " << ret << dendl; return ret; } @@ -1251,11 +1294,11 @@ int RadosBucket::write_logging_object(const std::string& obj_name, return 0; } if (const auto ret = rgw_rados_operate(dpp, io_ctx, temp_obj_name, &op, y); ret < 0) { - ldpp_dout(dpp, 1) << "failed to append to logging object '" << temp_obj_name << + ldpp_dout(dpp, 1) << "ERROR: failed to append to logging object '" << temp_obj_name << "'. ret = " << ret << dendl; return ret; } - ldpp_dout(dpp, 20) << "wrote " << record.length() << " bytes to logging object '" << + ldpp_dout(dpp, 20) << "INFO: wrote " << record.length() << " bytes to logging object '" << temp_obj_name << "'" << dendl; return 0; } diff --git a/src/rgw/driver/rados/rgw_sal_rados.h b/src/rgw/driver/rados/rgw_sal_rados.h index 85ea247e345..e65c3c0050e 100644 --- a/src/rgw/driver/rados/rgw_sal_rados.h +++ b/src/rgw/driver/rados/rgw_sal_rados.h @@ -780,18 +780,23 @@ class RadosBucket : public StoreBucket { optional_yield y, const DoutPrefixProvider *dpp) override; int remove_topics(RGWObjVersionTracker* objv_tracker, optional_yield y, const DoutPrefixProvider *dpp) override; - int get_logging_object_name(std::string& obj_name, - const std::string& prefix, - optional_yield y, - const DoutPrefixProvider *dpp, + int get_logging_object_name(std::string& obj_name, + const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, + RGWObjVersionTracker* objv_tracker) override; + int set_logging_object_name(const std::string& obj_name, + const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, + bool new_obj, RGWObjVersionTracker* objv_tracker) override; - int set_logging_object_name(const std::string& obj_name, - const std::string& prefix, - optional_yield y, - const DoutPrefixProvider *dpp, - bool new_obj, + int remove_logging_object_name(const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, RGWObjVersionTracker* objv_tracker) override; int commit_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp) override; + int remove_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp) override; int write_logging_object(const std::string& obj_name, const std::string& record, optional_yield y, const DoutPrefixProvider *dpp, bool async_completion) override; private: diff --git a/src/rgw/radosgw-admin/radosgw-admin.cc b/src/rgw/radosgw-admin/radosgw-admin.cc index 182e42b8e31..47b68d3f902 100644 --- a/src/rgw/radosgw-admin/radosgw-admin.cc +++ b/src/rgw/radosgw-admin/radosgw-admin.cc @@ -171,7 +171,8 @@ void usage() cout << " bucket sync disable disable bucket sync\n"; cout << " bucket sync enable enable bucket sync\n"; cout << " bucket radoslist list rados objects backing bucket's objects\n"; - cout << " bucket logging flush flush pending log records object of source bucket to the log bucket to bucket\n"; + cout << " bucket logging flush flush pending log records object of source bucket to the log bucket\n"; + cout << " bucket logging info get info on bucket logging configuration on source bucket or list of sources in log bucket\n"; cout << " bi get retrieve bucket index object entries\n"; cout << " bi put store bucket index object entries\n"; cout << " bi list list raw bucket index entries\n"; @@ -701,6 +702,7 @@ enum class OPT { BUCKET_OBJECT_SHARD, BUCKET_RESYNC_ENCRYPTED_MULTIPART, BUCKET_LOGGING_FLUSH, + BUCKET_LOGGING_INFO, POLICY, LOG_LIST, LOG_SHOW, @@ -940,6 +942,7 @@ static SimpleCmd::Commands all_cmds = { { "bucket object shard", OPT::BUCKET_OBJECT_SHARD }, { "bucket resync encrypted multipart", OPT::BUCKET_RESYNC_ENCRYPTED_MULTIPART }, { "bucket logging flush", OPT::BUCKET_LOGGING_FLUSH }, + { "bucket logging info", OPT::BUCKET_LOGGING_INFO }, { "policy", OPT::POLICY }, { "log list", OPT::LOG_LIST }, { "log show", OPT::LOG_SHOW }, @@ -7750,6 +7753,47 @@ int main(int argc, const char **argv) return 0; } + if (opt_cmd == OPT::BUCKET_LOGGING_INFO) { + if (bucket_name.empty()) { + cerr << "ERROR: bucket not specified" << std::endl; + return EINVAL; + } + int ret = init_bucket(tenant, bucket_name, bucket_id, &bucket); + if (ret < 0) { + return -ret; + } + const auto& bucket_attrs = bucket->get_attrs(); + auto iter = bucket_attrs.find(RGW_ATTR_BUCKET_LOGGING); + if (iter != bucket_attrs.end()) { + rgw::bucketlogging::configuration configuration; + try { + configuration.enabled = true; + decode(configuration, iter->second); + } catch (buffer::error& err) { + cerr << "ERROR: failed to decode logging attribute '" << RGW_ATTR_BUCKET_LOGGING + << "'. error: " << err.what() << std::endl; + return EINVAL; + } + encode_json("logging", configuration, formatter.get()); + formatter->flush(cout); + } + iter = bucket_attrs.find(RGW_ATTR_BUCKET_LOGGING_SOURCES); + if (iter != bucket_attrs.end()) { + rgw::bucketlogging::source_buckets sources; + try { + decode(sources, iter->second); + } catch (buffer::error& err) { + cerr << "ERROR: failed to decode logging sources attribute '" << RGW_ATTR_BUCKET_LOGGING_SOURCES + << "'. error: " << err.what() << std::endl; + return EINVAL; + } + encode_json("logging_sources", sources, formatter.get()); + formatter->flush(cout); + } + + return 0; + } + if (opt_cmd == OPT::LOG_LIST) { // filter by date? if (date.size() && date.size() != 10) { diff --git a/src/rgw/rgw_auth.cc b/src/rgw/rgw_auth.cc index f188e7975b3..a0b494eb9c5 100644 --- a/src/rgw/rgw_auth.cc +++ b/src/rgw/rgw_auth.cc @@ -188,7 +188,8 @@ int load_account_and_policies(const DoutPrefixProvider* dpp, static auto transform_old_authinfo(const RGWUserInfo& user, std::optional<RGWAccountInfo> account, - std::vector<IAM::Policy> policies) + std::vector<IAM::Policy> policies, + sal::Driver* driver) -> std::unique_ptr<rgw::auth::Identity> { /* This class is not intended for public use. Should be removed altogether @@ -198,6 +199,7 @@ static auto transform_old_authinfo(const RGWUserInfo& user, /* For this particular case it's OK to use rgw_user structure to convey * the identity info as this was the policy for doing that before the * new auth. */ + sal::Driver* driver; const rgw_user id; const std::string display_name; const std::string path; @@ -208,8 +210,10 @@ static auto transform_old_authinfo(const RGWUserInfo& user, public: DummyIdentityApplier(const RGWUserInfo& user, std::optional<RGWAccountInfo> account, - std::vector<IAM::Policy> policies) - : id(user.user_id), + std::vector<IAM::Policy> policies, + sal::Driver* driver) + : driver(driver), + id(user.user_id), display_name(user.display_name), path(user.path), is_admin(user.admin), @@ -294,9 +298,9 @@ static auto transform_old_authinfo(const RGWUserInfo& user, << ", is_admin=" << is_admin << ")"; } - void load_acct_info(const DoutPrefixProvider* dpp, - RGWUserInfo& user_info) const override { + auto load_acct_info(const DoutPrefixProvider* dpp) const -> std::unique_ptr<rgw::sal::User> override { // noop, this user info was passed in on construction + return driver->get_user(id); } void modify_request_state(const DoutPrefixProvider* dpp, req_state* s) const { @@ -307,7 +311,7 @@ static auto transform_old_authinfo(const RGWUserInfo& user, }; return std::make_unique<DummyIdentityApplier>( - user, std::move(account), std::move(policies)); + user, std::move(account), std::move(policies), driver); } auto transform_old_authinfo(const DoutPrefixProvider* dpp, @@ -332,7 +336,7 @@ auto transform_old_authinfo(const DoutPrefixProvider* dpp, if (policies_) { // return policies to caller if requested *policies_ = policies; } - return transform_old_authinfo(info, std::move(account), std::move(policies)); + return transform_old_authinfo(info, std::move(account), std::move(policies), driver); } } /* namespace auth */ @@ -527,7 +531,7 @@ rgw::auth::Strategy::apply(const DoutPrefixProvider *dpp, const rgw::auth::Strat /* Account used by a given RGWOp is decoupled from identity employed * in the authorization phase (RGWOp::verify_permissions). */ - applier->load_acct_info(dpp, s->user->get_info()); + s->user = applier->load_acct_info(dpp); s->perm_mask = applier->get_perm_mask(); /* This is the single place where we pass req_state as a pointer @@ -635,36 +639,36 @@ void rgw::auth::WebIdentityApplier::create_account(const DoutPrefixProvider* dpp user_info = user->get_info(); } -void rgw::auth::WebIdentityApplier::load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const { +auto rgw::auth::WebIdentityApplier::load_acct_info(const DoutPrefixProvider* dpp) const -> std::unique_ptr<rgw::sal::User> { rgw_user federated_user; federated_user.id = this->sub; federated_user.tenant = role_tenant; federated_user.ns = "oidc"; + std::unique_ptr<rgw::sal::User> user = driver->get_user(federated_user); if (account) { // we don't need shadow users for account roles because bucket ownership, // quota, and stats are tracked by the account instead of the user - user_info.user_id = std::move(federated_user); + RGWUserInfo& user_info = user->get_info(); user_info.display_name = user_name; user_info.type = TYPE_WEB; - return; + // the user_info.user_id is initialized by driver->get_user(...) + return user; } - std::unique_ptr<rgw::sal::User> user = driver->get_user(federated_user); - //Check in oidc namespace if (user->load_user(dpp, null_yield) >= 0) { /* Succeeded. */ - user_info = user->get_info(); - return; + // the user_info in user is initialized by user->load_user(...) + return user; } user->clear_ns(); //Check for old users which wouldn't have been created in oidc namespace if (user->load_user(dpp, null_yield) >= 0) { /* Succeeded. */ - user_info = user->get_info(); - return; + // the user_info in user is initialized by user->load_user(...) + return user; } //Check if user_id.buckets already exists, may have been from the time, when shadow users didnt exist @@ -675,7 +679,7 @@ void rgw::auth::WebIdentityApplier::load_acct_info(const DoutPrefixProvider* dpp last_synced, last_updated); if (ret < 0 && ret != -ENOENT) { ldpp_dout(dpp, 0) << "ERROR: reading stats for the user returned error " << ret << dendl; - return; + return user; } if (ret == -ENOENT) { /* in case of ENOENT, which means user doesnt have buckets */ //In this case user will be created in oidc namespace @@ -688,7 +692,8 @@ void rgw::auth::WebIdentityApplier::load_acct_info(const DoutPrefixProvider* dpp } ldpp_dout(dpp, 0) << "NOTICE: couldn't map oidc federated user " << federated_user << dendl; - create_account(dpp, federated_user, this->user_name, user_info); + create_account(dpp, federated_user, this->user_name, user->get_info()); + return user; } void rgw::auth::WebIdentityApplier::modify_request_state(const DoutPrefixProvider *dpp, req_state* s) const @@ -940,7 +945,7 @@ void rgw::auth::RemoteApplier::write_ops_log_entry(rgw_log_entry& entry) const } /* TODO(rzarzynski): we need to handle display_name changes. */ -void rgw::auth::RemoteApplier::load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const /* out */ +auto rgw::auth::RemoteApplier::load_acct_info(const DoutPrefixProvider* dpp) const -> std::unique_ptr<rgw::sal::User> /* out */ { /* It's supposed that RGWRemoteAuthApplier tries to load account info * that belongs to the authenticated identity. Another policy may be @@ -979,9 +984,9 @@ void rgw::auth::RemoteApplier::load_acct_info(const DoutPrefixProvider* dpp, RGW (void) load_account_and_policies(dpp, null_yield, driver, user->get_info(), user->get_attrs(), account, policies); - user_info = std::move(user->get_info()); owner_acct_user = std::move(tenanted_uid); - return; + // the user_info in user is initialized by user->load_user(...) + return user; } } @@ -994,15 +999,16 @@ void rgw::auth::RemoteApplier::load_acct_info(const DoutPrefixProvider* dpp, RGW (void) load_account_and_policies(dpp, null_yield, driver, user->get_info(), user->get_attrs(), account, policies); - user_info = std::move(user->get_info()); owner_acct_user = acct_user; - return; + // the user_info in user is initialized by user->load_user(...) + return user; } ldpp_dout(dpp, 0) << "NOTICE: couldn't map swift user " << acct_user << dendl; - create_account(dpp, acct_user, implicit_tenant, user_info); + create_account(dpp, acct_user, implicit_tenant, user->get_info()); /* Succeeded if we are here (create_account() hasn't throwed). */ + return user; } void rgw::auth::RemoteApplier::modify_request_state(const DoutPrefixProvider* dpp, req_state* s) const @@ -1102,11 +1108,11 @@ uint32_t rgw::auth::LocalApplier::get_perm_mask(const std::string& subuser_name, } } -void rgw::auth::LocalApplier::load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const /* out */ +auto rgw::auth::LocalApplier::load_acct_info(const DoutPrefixProvider* dpp) const -> std::unique_ptr<rgw::sal::User> /* out */ { /* Load the account that belongs to the authenticated identity. An extra call * to RADOS may be safely skipped in this case. */ - user_info = this->user_info; + return std::unique_ptr<rgw::sal::User>(user.release()); } void rgw::auth::LocalApplier::modify_request_state(const DoutPrefixProvider* dpp, req_state* s) const @@ -1125,6 +1131,22 @@ void rgw::auth::LocalApplier::write_ops_log_entry(rgw_log_entry& entry) const } } +rgw::auth::LocalApplier::LocalApplier(CephContext* const cct, + std::unique_ptr<rgw::sal::User> user, + std::optional<RGWAccountInfo> account, + std::vector<IAM::Policy> policies, + std::string subuser, + const std::optional<uint32_t>& perm_mask, + const std::string access_key_id) + : user_info(user->get_info()), + user(std::move(user)), + account(std::move(account)), + policies(std::move(policies)), + subuser(std::move(subuser)), + perm_mask(perm_mask.value_or(RGW_PERM_INVALID)), + access_key_id(access_key_id) { +} + ACLOwner rgw::auth::RoleApplier::get_aclowner() const { ACLOwner owner; @@ -1187,10 +1209,11 @@ bool rgw::auth::RoleApplier::is_identity(const Principal& p) const { return false; } -void rgw::auth::RoleApplier::load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const /* out */ +auto rgw::auth::RoleApplier::load_acct_info(const DoutPrefixProvider* dpp) const -> std::unique_ptr<rgw::sal::User> /* out */ { /* Load the user id */ - user_info.user_id = this->token_attrs.user_id; + std::unique_ptr<rgw::sal::User> user = driver->get_user(this->token_attrs.user_id); + return user; } void rgw::auth::RoleApplier::write_ops_log_entry(rgw_log_entry& entry) const @@ -1271,9 +1294,10 @@ rgw::auth::AnonymousEngine::authenticate(const DoutPrefixProvider* dpp, const re } else { RGWUserInfo user_info; rgw_get_anon_user(user_info); - + std::unique_ptr<rgw::sal::User> user = s->user->clone(); + user->get_info() = user_info; auto apl = \ - apl_factory->create_apl_local(cct, s, user_info, std::nullopt, {}, + apl_factory->create_apl_local(cct, s, std::move(user), std::nullopt, {}, rgw::auth::LocalApplier::NO_SUBUSER, std::nullopt, rgw::auth::LocalApplier::NO_ACCESS_KEY); return result_t::grant(std::move(apl)); diff --git a/src/rgw/rgw_auth.h b/src/rgw/rgw_auth.h index dcbd5f70f33..22b0816bac9 100644 --- a/src/rgw/rgw_auth.h +++ b/src/rgw/rgw_auth.h @@ -140,7 +140,7 @@ public: * * XXX: be aware that the "account" term refers to rgw_user. The naming * is legacy. */ - virtual void load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const = 0; /* out */ + virtual auto load_acct_info(const DoutPrefixProvider* dpp) const -> std::unique_ptr<rgw::sal::User> = 0; /* out */ /* Apply any changes to request state. This method will be most useful for * TempURL of Swift API. */ @@ -485,7 +485,7 @@ public: bool is_identity(const Principal& p) const override; - void load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const override; + auto load_acct_info(const DoutPrefixProvider* dpp) const -> std::unique_ptr<rgw::sal::User> override; uint32_t get_identity_type() const override { return TYPE_WEB; @@ -657,7 +657,7 @@ public: uint32_t get_perm_mask() const override { return info.perm_mask; } void to_str(std::ostream& out) const override; - void load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const override; /* out */ + auto load_acct_info(const DoutPrefixProvider* dpp) const -> std::unique_ptr<rgw::sal::User> override; /* out */ void modify_request_state(const DoutPrefixProvider* dpp, req_state* s) const override; void write_ops_log_entry(rgw_log_entry& entry) const override; uint32_t get_identity_type() const override { return info.acct_type; } @@ -684,7 +684,7 @@ public: /* rgw::auth::LocalApplier targets those auth engines that base on the data - * enclosed in the RGWUserInfo control structure. As a side effect of doing + * enclosed in the rgw::sal::User->RGWUserInfo control structure. As a side effect of doing * the authentication process, they must have it loaded. Leveraging this is * a way to avoid unnecessary calls to underlying RADOS store. */ class LocalApplier : public IdentityApplier { @@ -692,6 +692,7 @@ class LocalApplier : public IdentityApplier { protected: const RGWUserInfo user_info; + mutable std::unique_ptr<rgw::sal::User> user; const std::optional<RGWAccountInfo> account; const std::vector<IAM::Policy> policies; const std::string subuser; @@ -706,19 +707,12 @@ public: static const std::string NO_ACCESS_KEY; LocalApplier(CephContext* const cct, - const RGWUserInfo& user_info, + std::unique_ptr<rgw::sal::User> user, std::optional<RGWAccountInfo> account, std::vector<IAM::Policy> policies, std::string subuser, const std::optional<uint32_t>& perm_mask, - const std::string access_key_id) - : user_info(user_info), - account(std::move(account)), - policies(std::move(policies)), - subuser(std::move(subuser)), - perm_mask(perm_mask.value_or(RGW_PERM_INVALID)), - access_key_id(access_key_id) { - } + const std::string access_key_id); ACLOwner get_aclowner() const override; uint32_t get_perms_from_aclspec(const DoutPrefixProvider* dpp, const aclspec_t& aclspec) const override; @@ -733,7 +727,7 @@ public: } } void to_str(std::ostream& out) const override; - void load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const override; /* out */ + auto load_acct_info(const DoutPrefixProvider* dpp) const -> std::unique_ptr<rgw::sal::User> override; /* out */ void modify_request_state(const DoutPrefixProvider* dpp, req_state* s) const override; uint32_t get_identity_type() const override { return user_info.type; } std::string get_acct_name() const override { return {}; } @@ -751,7 +745,7 @@ public: virtual ~Factory() {} virtual aplptr_t create_apl_local(CephContext* cct, const req_state* s, - const RGWUserInfo& user_info, + std::unique_ptr<rgw::sal::User> user, std::optional<RGWAccountInfo> account, std::vector<IAM::Policy> policies, const std::string& subuser, @@ -780,15 +774,20 @@ public: std::vector<std::pair<std::string, std::string>> principal_tags; }; protected: + CephContext* const cct; + rgw::sal::Driver* driver; Role role; TokenAttrs token_attrs; public: RoleApplier(CephContext* const cct, + rgw::sal::Driver* driver, const Role& role, const TokenAttrs& token_attrs) - : role(role), + : cct(cct), + driver(driver), + role(role), token_attrs(token_attrs) {} ACLOwner get_aclowner() const override; @@ -804,7 +803,7 @@ public: return RGW_PERM_NONE; } void to_str(std::ostream& out) const override; - void load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const override; /* out */ + auto load_acct_info(const DoutPrefixProvider* dpp) const -> std::unique_ptr<rgw::sal::User> override; /* out */ uint32_t get_identity_type() const override { return TYPE_ROLE; } std::string get_acct_name() const override { return {}; } std::string get_subuser() const override { return {}; } diff --git a/src/rgw/rgw_auth_filters.h b/src/rgw/rgw_auth_filters.h index a93641e8b8e..7d264197c52 100644 --- a/src/rgw/rgw_auth_filters.h +++ b/src/rgw/rgw_auth_filters.h @@ -117,8 +117,8 @@ public: return get_decoratee().get_account(); } - void load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const override { /* out */ - return get_decoratee().load_acct_info(dpp, user_info); + auto load_acct_info(const DoutPrefixProvider* dpp) const -> std::unique_ptr<rgw::sal::User> override { /* out */ + return get_decoratee().load_acct_info(dpp); } void modify_request_state(const DoutPrefixProvider* dpp, req_state * s) const override { /* in/out */ @@ -152,7 +152,7 @@ public: } void to_str(std::ostream& out) const override; - void load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const override; /* out */ + auto load_acct_info(const DoutPrefixProvider* dpp) const -> std::unique_ptr<rgw::sal::User> override; /* out */ }; /* static declaration: UNKNOWN_ACCT will be an empty rgw_user that is a result @@ -169,23 +169,25 @@ void ThirdPartyAccountApplier<T>::to_str(std::ostream& out) const } template <typename T> -void ThirdPartyAccountApplier<T>::load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const +auto ThirdPartyAccountApplier<T>::load_acct_info(const DoutPrefixProvider* dpp) const -> std::unique_ptr<rgw::sal::User> { + std::unique_ptr<rgw::sal::User> luser; if (UNKNOWN_ACCT == acct_user_override) { /* There is no override specified by the upper layer. This means that we'll * load the account owned by the authenticated identity (aka auth_user). */ - DecoratedApplier<T>::load_acct_info(dpp, user_info); + luser = DecoratedApplier<T>::load_acct_info(dpp); } else if (DecoratedApplier<T>::is_owner_of(acct_user_override)) { /* The override has been specified but the account belongs to the authenticated * identity. We may safely forward the call to a next stage. */ - DecoratedApplier<T>::load_acct_info(dpp, user_info); + luser = DecoratedApplier<T>::load_acct_info(dpp); } else if (this->is_anonymous()) { /* If the user was authed by the anonymous engine then scope the ANON user * to the correct tenant */ + luser = driver->get_user(rgw_user(RGW_USER_ANON_ID)); if (acct_user_override.tenant.empty()) - user_info.user_id = rgw_user(acct_user_override.id, RGW_USER_ANON_ID); + luser->get_info().user_id = rgw_user(acct_user_override.id, RGW_USER_ANON_ID); else - user_info.user_id = rgw_user(acct_user_override.tenant, RGW_USER_ANON_ID); + luser->get_info().user_id = rgw_user(acct_user_override.tenant, RGW_USER_ANON_ID); } else { /* Compatibility mechanism for multi-tenancy. For more details refer to * load_acct_info method of rgw::auth::RemoteApplier. */ @@ -196,9 +198,10 @@ void ThirdPartyAccountApplier<T>::load_acct_info(const DoutPrefixProvider* dpp, user = driver->get_user(tenanted_uid); if (user->load_user(dpp, null_yield) >= 0) { - user_info = user->get_info(); + // the user_info in luser is initialized by user->load_user(...) + luser = user->clone(); /* Succeeded. */ - return; + return luser; } } @@ -213,8 +216,10 @@ void ThirdPartyAccountApplier<T>::load_acct_info(const DoutPrefixProvider* dpp, throw ret; } } - user_info = user->get_info(); + // the user_info in luser is initialized by user->load_user(...) + luser = user->clone(); } + return luser; } template <typename T> static inline @@ -248,7 +253,7 @@ public: } void to_str(std::ostream& out) const override; - void load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const override; /* out */ + auto load_acct_info(const DoutPrefixProvider* dpp) const -> std::unique_ptr<rgw::sal::User> override; /* out */ void modify_request_state(const DoutPrefixProvider* dpp, req_state* s) const override; /* in/out */ ACLOwner get_aclowner() const override { @@ -271,10 +276,10 @@ void SysReqApplier<T>::to_str(std::ostream& out) const } template <typename T> -void SysReqApplier<T>::load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo& user_info) const +auto SysReqApplier<T>::load_acct_info(const DoutPrefixProvider* dpp) const -> std::unique_ptr<rgw::sal::User> { - DecoratedApplier<T>::load_acct_info(dpp, user_info); - is_system = user_info.system; + std::unique_ptr<rgw::sal::User> user = DecoratedApplier<T>::load_acct_info(dpp); + is_system = user->get_info().system; if (is_system) { //ldpp_dout(dpp, 20) << "system request" << dendl; @@ -285,7 +290,7 @@ void SysReqApplier<T>::load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo effective_owner->id = parse_owner(str); if (const auto* uid = std::get_if<rgw_user>(&effective_owner->id); uid) { - std::unique_ptr<rgw::sal::User> user = driver->get_user(*uid); + user = driver->get_user(*uid); if (user->load_user(dpp, null_yield) < 0) { //ldpp_dout(dpp, 0) << "User lookup failed!" << dendl; throw -EACCES; @@ -294,14 +299,14 @@ void SysReqApplier<T>::load_acct_info(const DoutPrefixProvider* dpp, RGWUserInfo } } } + return user; } template <typename T> void SysReqApplier<T>::modify_request_state(const DoutPrefixProvider* dpp, req_state* const s) const { if (boost::logic::indeterminate(is_system)) { - RGWUserInfo unused_info; - load_acct_info(dpp, unused_info); + std::unique_ptr<rgw::sal::User> unused_user{ load_acct_info(dpp) }; } if (is_system) { diff --git a/src/rgw/rgw_auth_s3.h b/src/rgw/rgw_auth_s3.h index 2f7fd2d7598..5815a520e02 100644 --- a/src/rgw/rgw_auth_s3.h +++ b/src/rgw/rgw_auth_s3.h @@ -55,14 +55,14 @@ class STSAuthStrategy : public rgw::auth::Strategy, aplptr_t create_apl_local(CephContext* const cct, const req_state* const s, - const RGWUserInfo& user_info, + std::unique_ptr<rgw::sal::User> user, std::optional<RGWAccountInfo> account, std::vector<IAM::Policy> policies, const std::string& subuser, const std::optional<uint32_t>& perm_mask, const std::string& access_key_id) const override { auto apl = rgw::auth::add_sysreq(cct, driver, s, - LocalApplier(cct, user_info, std::move(account), std::move(policies), + LocalApplier(cct, std::move(user), std::move(account), std::move(policies), subuser, perm_mask, access_key_id)); return aplptr_t(new decltype(apl)(std::move(apl))); } @@ -72,7 +72,7 @@ class STSAuthStrategy : public rgw::auth::Strategy, RoleApplier::Role role, RoleApplier::TokenAttrs token_attrs) const override { auto apl = rgw::auth::add_sysreq(cct, driver, s, - rgw::auth::RoleApplier(cct, std::move(role), std::move(token_attrs))); + rgw::auth::RoleApplier(cct, driver, std::move(role), std::move(token_attrs))); return aplptr_t(new decltype(apl)(std::move(apl))); } @@ -176,14 +176,14 @@ class AWSAuthStrategy : public rgw::auth::Strategy, aplptr_t create_apl_local(CephContext* const cct, const req_state* const s, - const RGWUserInfo& user_info, + std::unique_ptr<rgw::sal::User> user, std::optional<RGWAccountInfo> account, std::vector<IAM::Policy> policies, const std::string& subuser, const std::optional<uint32_t>& perm_mask, const std::string& access_key_id) const override { auto apl = rgw::auth::add_sysreq(cct, driver, s, - LocalApplier(cct, user_info, std::move(account), std::move(policies), + LocalApplier(cct, std::move(user), std::move(account), std::move(policies), subuser, perm_mask, access_key_id)); /* TODO(rzarzynski): replace with static_ptr. */ return aplptr_t(new decltype(apl)(std::move(apl))); diff --git a/src/rgw/rgw_bucket_logging.cc b/src/rgw/rgw_bucket_logging.cc index d24a53024f1..dd407f26e8c 100644 --- a/src/rgw/rgw_bucket_logging.cc +++ b/src/rgw/rgw_bucket_logging.cc @@ -192,7 +192,7 @@ ceph::coarse_real_time time_from_name(const std::string& obj_name, const DoutPre ldpp_dout(dpp, 1) << "ERROR: logging object name too short: " << obj_name << dendl; return extracted_time; } - const auto time_start_pos = obj_name_length - (time_format_length + UniqueStringLength + 1); + const auto time_start_pos = obj_name_length - (time_format_length + UniqueStringLength + 1); // note: +1 is for the dash between the timestamp and the unique string std::string time_str = obj_name.substr(time_start_pos, time_format_length); @@ -206,6 +206,13 @@ ceph::coarse_real_time time_from_name(const std::string& obj_name, const DoutPre return extracted_time; } +std::string full_bucket_name(const std::unique_ptr<rgw::sal::Bucket>& bucket) { + if (bucket->get_tenant().empty()) { + return bucket->get_name(); + } + return fmt::format("{}:{}", bucket->get_tenant(), bucket->get_name()); +} + int new_logging_object(const configuration& conf, const std::unique_ptr<rgw::sal::Bucket>& bucket, std::string& obj_name, @@ -235,23 +242,22 @@ int new_logging_object(const configuration& conf, conf.target_prefix, to_string(bucket->get_owner()), source_region, - bucket->get_name(), + full_bucket_name(bucket), t, t, unique); } break; } - int ret = bucket->set_logging_object_name(obj_name, conf.target_prefix, y, dpp, init_obj, objv_tracker); if (ret == -EEXIST || ret == -ECANCELED) { if (ret = bucket->get_logging_object_name(obj_name, conf.target_prefix, y, dpp, nullptr); ret < 0) { ldpp_dout(dpp, 1) << "ERROR: failed to get name of logging object of bucket '" << - conf.target_bucket << "'. ret = " << ret << dendl; + conf.target_bucket << "' and prefix '" << conf.target_prefix << "', ret = " << ret << dendl; return ret; } ldpp_dout(dpp, 20) << "INFO: name already set. got name of logging object '" << obj_name << "' of bucket '" << - conf.target_bucket << "'" << dendl; + conf.target_bucket << "' and prefix '" << conf.target_prefix << "'" << dendl; return -ECANCELED; } else if (ret < 0) { ldpp_dout(dpp, 1) << "ERROR: failed to write name of logging object '" << obj_name << "' of bucket '" << @@ -263,6 +269,44 @@ int new_logging_object(const configuration& conf, return 0; } +int commit_logging_object(const configuration& conf, + const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + const std::string& tenant_name, + optional_yield y) { + std::string target_bucket_name; + std::string target_tenant_name; + auto ret = rgw_parse_url_bucket(conf.target_bucket, tenant_name, target_tenant_name, target_bucket_name); + if (ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: failed to parse target bucket '" << conf.target_bucket << "' when commiting logging object, ret = " + << ret << dendl; + return ret; + } + const rgw_bucket target_bucket_id(target_tenant_name, target_bucket_name); + std::unique_ptr<rgw::sal::Bucket> target_bucket; + ret = driver->load_bucket(dpp, target_bucket_id, + &target_bucket, y); + if (ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: failed to get target logging bucket '" << target_bucket_id << "' when commiting logging object, ret = " + << ret << dendl; + return ret; + } + return commit_logging_object(conf, target_bucket, dpp, y); +} + +int commit_logging_object(const configuration& conf, + const std::unique_ptr<rgw::sal::Bucket>& target_bucket, + const DoutPrefixProvider *dpp, + optional_yield y) { + std::string obj_name; + if (const auto ret = target_bucket->get_logging_object_name(obj_name, conf.target_prefix, y, dpp, nullptr); ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: failed to get name of logging object of bucket '" << + target_bucket->get_info().bucket << "'. ret = " << ret << dendl; + return ret; + } + return target_bucket->commit_logging_object(obj_name, y, dpp); +} + int rollover_logging_object(const configuration& conf, const std::unique_ptr<rgw::sal::Bucket>& bucket, std::string& obj_name, @@ -270,12 +314,16 @@ int rollover_logging_object(const configuration& conf, optional_yield y, bool must_commit, RGWObjVersionTracker* objv_tracker) { - if (conf.target_bucket != bucket->get_name()) { - ldpp_dout(dpp, 1) << "ERROR: bucket name mismatch: '" << conf.target_bucket << "' != '" << bucket->get_name() << "'" << dendl; + std::string target_bucket_name; + std::string target_tenant_name; + std::ignore = rgw_parse_url_bucket(conf.target_bucket, bucket->get_tenant(), target_tenant_name, target_bucket_name); + if (target_bucket_name != bucket->get_name() || target_tenant_name != bucket->get_tenant()) { + ldpp_dout(dpp, 1) << "ERROR: bucket name mismatch. conf= '" << conf.target_bucket << + "', bucket= '" << bucket->get_info().bucket << "'" << dendl; return -EINVAL; } const auto old_obj = obj_name; - const auto ret = new_logging_object(conf, bucket, obj_name, dpp, y, false, objv_tracker); + const auto ret = new_logging_object(conf, bucket, obj_name, dpp, y, false, objv_tracker); if (ret == -ECANCELED) { ldpp_dout(dpp, 20) << "INFO: rollover already performed for '" << old_obj << "' to bucket '" << conf.target_bucket << "'. ret = " << ret << dendl; @@ -342,14 +390,14 @@ S3 bucket short (ceph) log record - eTag };*/ -int log_record(rgw::sal::Driver* driver, +int log_record(rgw::sal::Driver* driver, const sal::Object* obj, - const req_state* s, - const std::string& op_name, - const std::string& etag, + const req_state* s, + const std::string& op_name, + const std::string& etag, size_t size, const configuration& conf, - const DoutPrefixProvider *dpp, + const DoutPrefixProvider *dpp, optional_yield y, bool async_completion, bool log_source_bucket) { @@ -357,11 +405,19 @@ int log_record(rgw::sal::Driver* driver, ldpp_dout(dpp, 1) << "ERROR: only bucket operations are logged" << dendl; return -EINVAL; } + std::string target_bucket_name; + std::string target_tenant_name; + auto ret = rgw_parse_url_bucket(conf.target_bucket, s->bucket_tenant, target_tenant_name, target_bucket_name); + if (ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: failed to parse target bucket '" << conf.target_bucket << "', ret = " << ret << dendl; + return ret; + } + const rgw_bucket target_bucket_id(target_tenant_name, target_bucket_name); std::unique_ptr<rgw::sal::Bucket> target_bucket; - auto ret = driver->load_bucket(dpp, rgw_bucket(s->bucket_tenant, conf.target_bucket), + ret = driver->load_bucket(dpp, target_bucket_id, &target_bucket, y); if (ret < 0) { - ldpp_dout(dpp, 1) << "ERROR: failed to get target logging bucket '" << conf.target_bucket << "'. ret = " << ret << dendl; + ldpp_dout(dpp, 1) << "ERROR: failed to get target logging bucket '" << target_bucket_id << "'. ret = " << ret << dendl; return ret; } std::string obj_name; @@ -382,12 +438,14 @@ int log_record(rgw::sal::Driver* driver, // try to create the temporary log object for the first time ret = new_logging_object(conf, target_bucket, obj_name, dpp, y, true, nullptr); if (ret == 0) { - ldpp_dout(dpp, 20) << "INFO: first time logging for bucket '" << conf.target_bucket << "'" << dendl; + ldpp_dout(dpp, 20) << "INFO: first time logging for bucket '" << conf.target_bucket << "' and prefix '" << + conf.target_prefix << "'" << dendl; } else if (ret == -ECANCELED) { - ldpp_dout(dpp, 20) << "INFO: logging object '" << obj_name << "' already exists for bucket '" << conf.target_bucket << "', will be used" << dendl; + ldpp_dout(dpp, 20) << "INFO: logging object '" << obj_name << "' already exists for bucket '" << conf.target_bucket << "' and prefix" << + conf.target_prefix << "'" << dendl; } else { ldpp_dout(dpp, 1) << "ERROR: failed to create logging object of bucket '" << - conf.target_bucket << "' for the first time. ret = " << ret << dendl; + conf.target_bucket << "' and prefix '" << conf.target_prefix << "' for the first time. ret = " << ret << dendl; return ret; } } else { @@ -420,7 +478,7 @@ int log_record(rgw::sal::Driver* driver, bucket_name = s->src_bucket_name; } else { bucket_owner = to_string( s->bucket->get_owner()); - bucket_name = s->bucket->get_name(); + bucket_name = full_bucket_name(s->bucket); } switch (conf.logging_type) { @@ -459,7 +517,7 @@ int log_record(rgw::sal::Driver* driver, case LoggingType::Journal: record = fmt::format("{} {} [{:%d/%b/%Y:%H:%M:%S %z}] {} {} {} {} {}", dash_if_empty(to_string(s->bucket->get_owner())), - dash_if_empty(s->bucket->get_name()), + dash_if_empty(full_bucket_name(s->bucket)), t, op_name, dash_if_empty_or_null(obj, obj->get_name()), @@ -512,12 +570,12 @@ std::string object_name_oid(const rgw::sal::Bucket* bucket, const std::string& p int log_record(rgw::sal::Driver* driver, LoggingType type, const sal::Object* obj, - const req_state* s, - const std::string& op_name, - const std::string& etag, - size_t size, - const DoutPrefixProvider *dpp, - optional_yield y, + const req_state* s, + const std::string& op_name, + const std::string& etag, + size_t size, + const DoutPrefixProvider *dpp, + optional_yield y, bool async_completion, bool log_source_bucket) { if (!s->bucket) { @@ -534,7 +592,7 @@ int log_record(rgw::sal::Driver* driver, try { configuration.enabled = true; auto bl_iter = iter->second.cbegin(); - decode(configuration, bl_iter); + decode(configuration, bl_iter); if (type != LoggingType::Any && configuration.logging_type != type) { return 0; } @@ -543,20 +601,199 @@ int log_record(rgw::sal::Driver* driver, return 0; } } - ldpp_dout(dpp, 20) << "INFO: found matching logging configuration of bucket '" << s->bucket->get_name() << + ldpp_dout(dpp, 20) << "INFO: found matching logging configuration of bucket '" << s->bucket->get_info().bucket << "' configuration: " << configuration.to_json_str() << dendl; - if (auto ret = log_record(driver, obj, s, op_name, etag, size, configuration, dpp, y, async_completion, log_source_bucket); ret < 0) { - ldpp_dout(dpp, 1) << "ERROR: failed to perform logging for bucket '" << s->bucket->get_name() << + if (auto ret = log_record(driver, obj, s, op_name, etag, size, configuration, dpp, y, async_completion, log_source_bucket); ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: failed to perform logging for bucket '" << s->bucket->get_info().bucket << "'. ret=" << ret << dendl; return ret; } } catch (buffer::error& err) { - ldpp_dout(dpp, 1) << "ERROR: failed to decode logging attribute '" << RGW_ATTR_BUCKET_LOGGING + ldpp_dout(dpp, 1) << "ERROR: failed to decode logging attribute '" << RGW_ATTR_BUCKET_LOGGING << "'. error: " << err.what() << dendl; return -EINVAL; } return 0; } +int get_bucket_id(const std::string& bucket_name, const std::string& tenant_name, rgw_bucket& bucket_id) { + std::string parsed_bucket_name; + std::string parsed_tenant_name; + if (const auto ret = rgw_parse_url_bucket(bucket_name, tenant_name, parsed_tenant_name, parsed_bucket_name); ret < 0) { + return ret; + } + bucket_id = rgw_bucket{parsed_tenant_name, parsed_bucket_name}; + return 0; +} + +int update_bucket_logging_sources(const DoutPrefixProvider* dpp, rgw::sal::Driver* driver, const rgw_bucket& target_bucket_id, const rgw_bucket& src_bucket_id, bool add, optional_yield y) { + std::unique_ptr<rgw::sal::Bucket> target_bucket; + const auto ret = driver->load_bucket(dpp, target_bucket_id, &target_bucket, y); + if (ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: failed to get target bucket '" << target_bucket_id << "', ret = " << ret << dendl; + return ret; + } + return update_bucket_logging_sources(dpp, target_bucket, src_bucket_id, add, y); +} + +int update_bucket_logging_sources(const DoutPrefixProvider* dpp, std::unique_ptr<rgw::sal::Bucket>& bucket, const rgw_bucket& src_bucket_id, bool add, optional_yield y) { + return retry_raced_bucket_write(dpp, bucket.get(), [dpp, &bucket, &src_bucket_id, add, y] { + auto& attrs = bucket->get_attrs(); + auto iter = attrs.find(RGW_ATTR_BUCKET_LOGGING_SOURCES); + if (iter == attrs.end()) { + if (!add) { + ldpp_dout(dpp, 20) << "INFO: no logging sources attribute '" << RGW_ATTR_BUCKET_LOGGING_SOURCES + << "' for bucket '" << bucket->get_info().bucket << "', nothing to remove" << dendl; + return 0; + } + source_buckets sources{src_bucket_id}; + bufferlist bl; + ceph::encode(sources, bl); + attrs.insert(std::make_pair(RGW_ATTR_BUCKET_LOGGING_SOURCES, std::move(bl))); + return bucket->merge_and_store_attrs(dpp, attrs, y); + } + try { + source_buckets sources; + ceph::decode(sources, iter->second); + if ((add && sources.insert(src_bucket_id).second) || + (!add && sources.erase(src_bucket_id) > 0)) { + bufferlist bl; + ceph::encode(sources, bl); + iter->second = std::move(bl); + return bucket->merge_and_store_attrs(dpp, attrs, y); + } + } catch (buffer::error& err) { + ldpp_dout(dpp, 1) << "WARNING: failed to decode logging sources attribute '" << RGW_ATTR_BUCKET_LOGGING_SOURCES + << "' for bucket '" << bucket->get_info().bucket << "', error: " << err.what() << dendl; + } + ldpp_dout(dpp, 20) << "INFO: logging source '" << src_bucket_id << "' already " << + (add ? "added to" : "removed from") << " bucket '" << bucket->get_info().bucket << "'" << dendl; + return 0; + }, y); +} + + +int bucket_deletion_cleanup(const DoutPrefixProvider* dpp, + sal::Driver* driver, + sal::Bucket* bucket, + optional_yield y) { + // if the bucket is used a log bucket, we should delete all pending log objects + // and also delete the object holding the pending object name + auto& attrs = bucket->get_attrs(); + if (const auto iter = attrs.find(RGW_ATTR_BUCKET_LOGGING_SOURCES); iter != attrs.end()) { + try { + source_buckets sources; + ceph::decode(sources, iter->second); + for (const auto& source : sources) { + std::unique_ptr<rgw::sal::Bucket> src_bucket; + if (const auto ret = driver->load_bucket(dpp, source, &src_bucket, y); ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: failed to get logging source bucket '" << source << "' for log bucket '" << + bucket->get_info().bucket << "', ret = " << ret << dendl; + continue; + } + auto& src_attrs = src_bucket->get_attrs(); + if (const auto iter = src_attrs.find(RGW_ATTR_BUCKET_LOGGING); iter != src_attrs.end()) { + configuration conf; + try { + auto bl_iter = iter->second.cbegin(); + decode(conf, bl_iter); + std::string obj_name; + RGWObjVersionTracker objv; + if (const auto ret = bucket->get_logging_object_name(obj_name, conf.target_prefix, y, dpp, &objv); ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: failed to get logging object name for log bucket '" << bucket->get_info().bucket << + "', ret = " << ret << dendl; + continue; + } + if (const auto ret = bucket->remove_logging_object(obj_name, y, dpp); ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: failed to delete pending logging object '" << obj_name << "' for log bucket '" << + bucket->get_info().bucket << "', ret = " << ret << dendl; + continue; + } + ldpp_dout(dpp, 20) << "INFO: successfully deleted pending logging object '" << obj_name << "' from deleted log bucket '" << + bucket->get_info().bucket << "'" << dendl; + if (const auto ret = bucket->remove_logging_object_name(conf.target_prefix, y, dpp, &objv); ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: failed to delete object holding bucket logging object name for log bucket '" << + bucket->get_info().bucket << "', ret = " << ret << dendl; + continue; + } + ldpp_dout(dpp, 20) << "INFO: successfully deleted object holding bucket logging object name from deleted log bucket '" << + bucket->get_info().bucket << "'" << dendl; + } catch (buffer::error& err) { + ldpp_dout(dpp, 1) << "WARNING: failed to decode logging attribute '" << RGW_ATTR_BUCKET_LOGGING + << "' of bucket '" << src_bucket->get_info().bucket << "', error: " << err.what() << dendl; + } + } + } + } catch (buffer::error& err) { + ldpp_dout(dpp, 1) << "WARNING: failed to decode logging sources attribute '" << RGW_ATTR_BUCKET_LOGGING_SOURCES + << "' for bucket '" << bucket->get_info().bucket << "', error: " << err.what() << dendl; + return -EIO; + } + } + + return source_bucket_cleanup(dpp, driver, bucket, false, y); +} + +int source_bucket_cleanup(const DoutPrefixProvider* dpp, + sal::Driver* driver, + sal::Bucket* bucket, + bool remove_attr, + optional_yield y) { + std::optional<configuration> conf; + const auto& info = bucket->get_info(); + if (const auto ret = retry_raced_bucket_write(dpp, bucket, [dpp, bucket, &conf, &info, remove_attr, y] { + auto& attrs = bucket->get_attrs(); + if (auto iter = attrs.find(RGW_ATTR_BUCKET_LOGGING); iter != attrs.end()) { + try { + auto bl_iter = iter->second.cbegin(); + configuration tmp_conf; + tmp_conf.enabled = true; + decode(tmp_conf, bl_iter); + conf = std::move(tmp_conf); + } catch (buffer::error& err) { + ldpp_dout(dpp, 1) << "WARNING: failed to decode existing logging attribute '" << RGW_ATTR_BUCKET_LOGGING + << "' of bucket '" << info.bucket << "', error: " << err.what() << dendl; + return -EIO; + } + if (remove_attr) { + attrs.erase(iter); + return bucket->merge_and_store_attrs(dpp, attrs, y); + } + } + // nothing to remove or no need to remove + return 0; + }, y); ret < 0) { + if (remove_attr) { + ldpp_dout(dpp, 1) << "ERROR: failed to remove logging attribute '" << RGW_ATTR_BUCKET_LOGGING << "' from bucket '" << + info.bucket << "', ret = " << ret << dendl; + } + return ret; + } + if (!conf) { + // no logging attribute found + return 0; + } + if (const auto ret = commit_logging_object(*conf, dpp, driver, info.bucket.tenant, y); ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: could not commit pending logging object of bucket '" << + info.bucket << "', ret = " << ret << dendl; + } else { + ldpp_dout(dpp, 20) << "INFO: successfully committed pending logging object of bucket '" << info.bucket << "'" << dendl; + } + rgw_bucket target_bucket_id; + rgw_bucket src_bucket_id{info.bucket.tenant, info.bucket.name}; + if (const auto ret = get_bucket_id(conf->target_bucket, info.bucket.tenant, target_bucket_id); ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: failed to parse target bucket '" << conf->target_bucket << "', ret = " << ret << dendl; + return 0; + } + if (const auto ret = update_bucket_logging_sources(dpp, driver, target_bucket_id, src_bucket_id, false, y); ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: could not update bucket logging source '" << + info.bucket << "', ret = " << ret << dendl; + return 0; + } + ldpp_dout(dpp, 20) << "INFO: successfully updated bucket logging source '" << + info.bucket << "'"<< dendl; + return 0; +} + } // namespace rgw::bucketlogging diff --git a/src/rgw/rgw_bucket_logging.h b/src/rgw/rgw_bucket_logging.h index d4877bafb0f..cbdb8b55f88 100644 --- a/src/rgw/rgw_bucket_logging.h +++ b/src/rgw/rgw_bucket_logging.h @@ -4,7 +4,6 @@ #pragma once #include <string> -#include <optional> #include <cstdint> #include "rgw_sal_fwd.h" #include "include/buffer.h" @@ -16,7 +15,7 @@ class XMLObj; namespace ceph { class Formatter; } class DoutPrefixProvider; struct req_state; -class RGWObjVersionTracker; +struct RGWObjVersionTracker; class RGWOp; namespace rgw::bucketlogging { @@ -66,6 +65,17 @@ enum class LoggingType {Standard, Journal, Any}; enum class PartitionDateSource {DeliveryTime, EventTime}; struct configuration { + bool operator==(const configuration& rhs) const { + return enabled == rhs.enabled && + target_bucket == rhs.target_bucket && + obj_key_format == rhs.obj_key_format && + target_prefix == rhs.target_prefix && + obj_roll_time == rhs.obj_roll_time && + logging_type == rhs.logging_type && + records_batch_size == rhs.records_batch_size && + date_source == rhs.date_source && + key_filter == rhs.key_filter; + } uint32_t default_obj_roll_time = 300; bool enabled = false; std::string target_bucket; @@ -129,6 +139,8 @@ struct configuration { }; WRITE_CLASS_ENCODER(configuration) +using source_buckets = std::set<rgw_bucket>; + constexpr unsigned MAX_BUCKET_LOGGING_BUFFER = 1000; using bucket_logging_records = std::array<std::string, MAX_BUCKET_LOGGING_BUFFER>; @@ -155,7 +167,7 @@ int log_record(rgw::sal::Driver* driver, bool async_completion, bool log_source_bucket); -// commit the pending log objec tto the log bucket +// commit the pending log objec to the log bucket // and create a new pending log object // if "must_commit" is "false" the function will return success even if the pending log object was not committed int rollover_logging_object(const configuration& conf, @@ -166,6 +178,23 @@ int rollover_logging_object(const configuration& conf, bool must_commit, RGWObjVersionTracker* objv_tracker); +// commit the pending log object to the log bucket +// use this for cleanup, when new pending object is not needed +// and target bucket is known +int commit_logging_object(const configuration& conf, + const std::unique_ptr<rgw::sal::Bucket>& target_bucket, + const DoutPrefixProvider *dpp, + optional_yield y); + +// commit the pending log object to the log bucket +// use this for cleanup, when new pending object is not needed +// and target bucket shoud be loaded based on the configuration +int commit_logging_object(const configuration& conf, + const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + const std::string& tenant_name, + optional_yield y); + // return the oid of the object holding the name of the temporary logging object // bucket - log bucket // prefix - logging prefix from configuration. should be used when multiple buckets log into the same log bucket @@ -185,5 +214,37 @@ int log_record(rgw::sal::Driver* driver, optional_yield y, bool async_completion, bool log_source_bucket); + +// return (by ref) an rgw_bucket object with the bucket name and tenant name +// fails if the bucket name is not in the format: [tenant name:]<bucket name> +int get_bucket_id(const std::string& bucket_name, const std::string& tenant_name, rgw_bucket& bucket_id); + +// update (add or remove) a source bucket from the list of source buckets in the target bucket +// use this function when the target bucket is already loaded +int update_bucket_logging_sources(const DoutPrefixProvider* dpp, std::unique_ptr<rgw::sal::Bucket>& bucket, + const rgw_bucket& src_bucket, bool add, optional_yield y); + +// update (add or remove) a source bucket from the list of source buckets in the target bucket +// use this function when the target bucket is not known and needs to be loaded +int update_bucket_logging_sources(const DoutPrefixProvider* dpp, rgw::sal::Driver* driver, const rgw_bucket& target_bucket_id, + const rgw_bucket& src_bucket_id, bool add, optional_yield y); + +// when source bucket is deleted, all pending log objects should be comitted to the log bucket +// when the target bucket is deleted, all pending log objects should be deleted, as well as the object holding the pending log object name +int bucket_deletion_cleanup(const DoutPrefixProvider* dpp, + sal::Driver* driver, + sal::Bucket* bucket, + optional_yield y); + +// if bucket has bucket logging configuration associated with it then: +// if "remove_attr" is true, the bucket logging configuration should be removed from the bucket +// in addition: +// any pending log objects should be comitted to the log bucket +// and the log bucket should be updated to remove the bucket as a source +int source_bucket_cleanup(const DoutPrefixProvider* dpp, + sal::Driver* driver, + sal::Bucket* bucket, + bool remove_attr, + optional_yield y); } // namespace rgw::bucketlogging diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index d2917838f36..99f7db4f569 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -108,6 +108,7 @@ using ceph::crypto::MD5; #define RGW_ATTR_X_ROBOTS_TAG RGW_ATTR_PREFIX "x-robots-tag" #define RGW_ATTR_STORAGE_CLASS RGW_ATTR_PREFIX "storage_class" #define RGW_ATTR_BUCKET_LOGGING RGW_ATTR_PREFIX "logging" +#define RGW_ATTR_BUCKET_LOGGING_SOURCES RGW_ATTR_PREFIX "logging-sources" /* S3 Object Lock*/ #define RGW_ATTR_OBJECT_LOCK RGW_ATTR_PREFIX "object-lock" diff --git a/src/rgw/rgw_rest_bucket_logging.cc b/src/rgw/rgw_rest_bucket_logging.cc index ed12ce855a9..afd79b0a548 100644 --- a/src/rgw/rgw_rest_bucket_logging.cc +++ b/src/rgw/rgw_rest_bucket_logging.cc @@ -58,30 +58,29 @@ public: return; } - std::unique_ptr<rgw::sal::Bucket> bucket; - op_ret = driver->load_bucket(this, rgw_bucket(s->bucket_tenant, s->bucket_name), - &bucket, y); + const rgw_bucket src_bucket_id(s->bucket_tenant, s->bucket_name); + std::unique_ptr<rgw::sal::Bucket> src_bucket; + op_ret = driver->load_bucket(this, src_bucket_id, + &src_bucket, y); if (op_ret < 0) { - ldpp_dout(this, 1) << "ERROR: failed to get bucket '" << - (s->bucket_tenant.empty() ? s->bucket_name : s->bucket_tenant + ":" + s->bucket_name) << - "' info, ret = " << op_ret << dendl; + ldpp_dout(this, 1) << "ERROR: failed to get bucket '" << src_bucket_id << "', ret = " << op_ret << dendl; return; } - if (auto iter = bucket->get_attrs().find(RGW_ATTR_BUCKET_LOGGING); iter != bucket->get_attrs().end()) { + if (auto iter = src_bucket->get_attrs().find(RGW_ATTR_BUCKET_LOGGING); iter != src_bucket->get_attrs().end()) { try { configuration.enabled = true; decode(configuration, iter->second); } catch (buffer::error& err) { - ldpp_dout(this, 1) << "ERROR: failed to decode attribute '" << RGW_ATTR_BUCKET_LOGGING - << "'. error: " << err.what() << dendl; + ldpp_dout(this, 1) << "WARNING: failed to decode logging attribute '" << RGW_ATTR_BUCKET_LOGGING + << "' for bucket '" << src_bucket_id << "', error: " << err.what() << dendl; op_ret = -EIO; return; } } else { - ldpp_dout(this, 5) << "WARNING: no logging configuration on bucket '" << bucket->get_name() << "'" << dendl; + ldpp_dout(this, 5) << "WARNING: no logging configuration on bucket '" << src_bucket_id << "'" << dendl; return; } - ldpp_dout(this, 20) << "INFO: found logging configuration on bucket '" << bucket->get_name() << "'" + ldpp_dout(this, 20) << "INFO: found logging configuration on bucket '" << src_bucket_id << "'" << "'. configuration: " << configuration.to_json_str() << dendl; } @@ -159,58 +158,125 @@ class RGWPutBucketLoggingOp : public RGWDefaultResponseOp { return; } - std::unique_ptr<rgw::sal::Bucket> bucket; - op_ret = driver->load_bucket(this, rgw_bucket(s->bucket_tenant, s->bucket_name), - &bucket, y); + const rgw_bucket src_bucket_id(s->bucket_tenant, s->bucket_name); + std::unique_ptr<rgw::sal::Bucket> src_bucket; + op_ret = driver->load_bucket(this, src_bucket_id, + &src_bucket, y); if (op_ret < 0) { - ldpp_dout(this, 1) << "ERROR: failed to get bucket '" << s->bucket_name << "', ret = " << op_ret << dendl; + ldpp_dout(this, 1) << "ERROR: failed to get bucket '" << src_bucket_id << "', ret = " << op_ret << dendl; return; } - - auto& attrs = bucket->get_attrs(); if (!configuration.enabled) { - if (auto iter = attrs.find(RGW_ATTR_BUCKET_LOGGING); iter != attrs.end()) { - attrs.erase(iter); - } - } else { - std::unique_ptr<rgw::sal::Bucket> target_bucket; - op_ret = driver->load_bucket(this, rgw_bucket(s->bucket_tenant, configuration.target_bucket), - &target_bucket, y); - if (op_ret < 0) { - ldpp_dout(this, 1) << "ERROR: failed to get target bucket '" << configuration.target_bucket << "', ret = " << op_ret << dendl; - return; - } - const auto& target_attrs = target_bucket->get_attrs(); - if (target_attrs.find(RGW_ATTR_BUCKET_LOGGING) != target_attrs.end()) { - // target bucket must not have logging set on it - ldpp_dout(this, 1) << "ERROR: logging target bucket '" << configuration.target_bucket << "', is configured with bucket logging" << dendl; - op_ret = -EINVAL; - return; - } - // TODO: verify target bucket does not have encryption - bufferlist conf_bl; - encode(configuration, conf_bl); - attrs[RGW_ATTR_BUCKET_LOGGING] = conf_bl; - // TODO: should we add attribute to target bucket indicating it is target to bucket logging? - // if we do, how do we maintain it when bucket logging changes? + op_ret = rgw::bucketlogging::source_bucket_cleanup(this, driver, src_bucket.get(), true, y); + return; + } + + // set logging configuration + rgw_bucket target_bucket_id; + if (op_ret = rgw::bucketlogging::get_bucket_id(configuration.target_bucket, s->bucket_tenant, target_bucket_id); op_ret < 0) { + ldpp_dout(this, 1) << "ERROR: failed to parse target bucket '" << configuration.target_bucket << "', ret = " << op_ret << dendl; + return; + } + + if (target_bucket_id == src_bucket_id) { + ldpp_dout(this, 1) << "ERROR: target bucket '" << target_bucket_id << "' must be different from source bucket" << dendl; + op_ret = -EINVAL; + return; + } + std::unique_ptr<rgw::sal::Bucket> target_bucket; + op_ret = driver->load_bucket(this, target_bucket_id, + &target_bucket, y); + if (op_ret < 0) { + ldpp_dout(this, 1) << "ERROR: failed to get target bucket '" << target_bucket_id << "', ret = " << op_ret << dendl; + return; + } + auto& target_attrs = target_bucket->get_attrs(); + if (target_attrs.find(RGW_ATTR_BUCKET_LOGGING) != target_attrs.end()) { + // target bucket must not have logging set on it + ldpp_dout(this, 1) << "ERROR: logging target bucket '" << target_bucket_id << "', is configured with bucket logging" << dendl; + op_ret = -EINVAL; + return; } - // TODO: use retry_raced_bucket_write from rgw_op.cc - op_ret = bucket->merge_and_store_attrs(this, attrs, y); + // verify target bucket does not have encryption + if (target_attrs.find(RGW_ATTR_BUCKET_ENCRYPTION_POLICY) != target_attrs.end()) { + ldpp_dout(this, 1) << "ERROR: logging target bucket '" << target_bucket_id << "', is configured with encryption" << dendl; + op_ret = -EINVAL; + return; + } + std::optional<rgw::bucketlogging::configuration> old_conf; + bufferlist conf_bl; + encode(configuration, conf_bl); + op_ret = retry_raced_bucket_write(this, src_bucket.get(), [this, &conf_bl, &src_bucket, &old_conf, &configuration, y] { + auto& attrs = src_bucket->get_attrs(); + auto it = attrs.find(RGW_ATTR_BUCKET_LOGGING); + if (it != attrs.end()) { + try { + rgw::bucketlogging::configuration tmp_conf; + tmp_conf.enabled = true; + decode(tmp_conf, it->second); + old_conf = std::move(tmp_conf); + } catch (buffer::error& err) { + ldpp_dout(this, 1) << "WARNING: failed to decode existing logging attribute '" << RGW_ATTR_BUCKET_LOGGING + << "' for bucket '" << src_bucket->get_info().bucket << "', error: " << err.what() << dendl; + } + if (!old_conf || (old_conf && *old_conf != configuration)) { + // conf changed (or was unknown) - update + it->second = conf_bl; + return src_bucket->merge_and_store_attrs(this, attrs, y); + } + // nothing to update + return 0; + } + // conf was added + attrs.insert(std::make_pair(RGW_ATTR_BUCKET_LOGGING, conf_bl)); + return src_bucket->merge_and_store_attrs(this, attrs, y); + }, y); if (op_ret < 0) { ldpp_dout(this, 1) << "ERROR: failed to set logging attribute '" << RGW_ATTR_BUCKET_LOGGING << "' to bucket '" << - bucket->get_name() << "', ret = " << op_ret << dendl; + src_bucket_id << "', ret = " << op_ret << dendl; return; } - - ldpp_dout(this, 20) << "INFO: " << (configuration.enabled ? "wrote" : "removed") - << " logging configuration. bucket '" << bucket->get_name() << "'. configuration: " << - configuration.to_json_str() << dendl; + if (!old_conf) { + ldpp_dout(this, 20) << "INFO: new logging configuration added to bucket '" << src_bucket_id << "'. configuration: " << + configuration.to_json_str() << dendl; + if (const auto ret = rgw::bucketlogging::update_bucket_logging_sources(this, target_bucket, src_bucket_id, true, y); ret < 0) { + ldpp_dout(this, 1) << "WARNING: failed to add source bucket '" << src_bucket_id << "' to logging sources of target bucket '" << + target_bucket_id << "', ret = " << ret << dendl; + } + } else if (*old_conf != configuration) { + // conf changed - do cleanup + if (const auto ret = commit_logging_object(*old_conf, target_bucket, this, y); ret < 0) { + ldpp_dout(this, 1) << "WARNING: could not commit pending logging object when updating logging configuration of bucket '" << + src_bucket->get_info().bucket << "', ret = " << ret << dendl; + } else { + ldpp_dout(this, 20) << "INFO: committed pending logging object when updating logging configuration of bucket '" << + src_bucket->get_info().bucket << "'" << dendl; + } + if (old_conf->target_bucket != configuration.target_bucket) { + rgw_bucket old_target_bucket_id; + if (const auto ret = rgw::bucketlogging::get_bucket_id(old_conf->target_bucket, s->bucket_tenant, old_target_bucket_id); ret < 0) { + ldpp_dout(this, 1) << "ERROR: failed to parse target bucket '" << old_conf->target_bucket << "', ret = " << ret << dendl; + return; + } + if (const auto ret = rgw::bucketlogging::update_bucket_logging_sources(this, driver, old_target_bucket_id, src_bucket_id, false, y); ret < 0) { + ldpp_dout(this, 1) << "WARNING: failed to remove source bucket '" << src_bucket_id << "' from logging sources of original target bucket '" << + old_target_bucket_id << "', ret = " << ret << dendl; + } + if (const auto ret = rgw::bucketlogging::update_bucket_logging_sources(this, target_bucket, src_bucket_id, true, y); ret < 0) { + ldpp_dout(this, 1) << "WARNING: failed to add source bucket '" << src_bucket_id << "' to logging sources of target bucket '" << + target_bucket_id << "', ret = " << ret << dendl; + } + } + ldpp_dout(this, 20) << "INFO: wrote logging configuration to bucket '" << src_bucket_id << "'. configuration: " << + configuration.to_json_str() << dendl; + } else { + ldpp_dout(this, 20) << "INFO: logging configuration of bucket '" << src_bucket_id << "' did not change" << dendl; + } } }; // Post /<bucket name>/?logging -// actual configuration is XML encoded in the body of the message class RGWPostBucketLoggingOp : public RGWDefaultResponseOp { int verify_permission(optional_yield y) override { auto [has_s3_existing_tag, has_s3_resource_tag] = rgw_check_policy_condition(this, s, false); @@ -234,17 +300,18 @@ class RGWPostBucketLoggingOp : public RGWDefaultResponseOp { return; } - std::unique_ptr<rgw::sal::Bucket> bucket; - op_ret = driver->load_bucket(this, rgw_bucket(s->bucket_tenant, s->bucket_name), - &bucket, y); + const rgw_bucket src_bucket_id(s->bucket_tenant, s->bucket_name); + std::unique_ptr<rgw::sal::Bucket> src_bucket; + op_ret = driver->load_bucket(this, src_bucket_id, + &src_bucket, y); if (op_ret < 0) { - ldpp_dout(this, 1) << "ERROR: failed to get bucket '" << s->bucket_name << "', ret = " << op_ret << dendl; + ldpp_dout(this, 1) << "ERROR: failed to get bucket '" << src_bucket_id << "', ret = " << op_ret << dendl; return; } - const auto& bucket_attrs = bucket->get_attrs(); + const auto& bucket_attrs = src_bucket->get_attrs(); auto iter = bucket_attrs.find(RGW_ATTR_BUCKET_LOGGING); if (iter == bucket_attrs.end()) { - ldpp_dout(this, 1) << "WARNING: no logging configured on bucket" << dendl; + ldpp_dout(this, 1) << "WARNING: no logging configured on bucket '" << src_bucket_id << "'" << dendl; return; } rgw::bucketlogging::configuration configuration; @@ -252,33 +319,38 @@ class RGWPostBucketLoggingOp : public RGWDefaultResponseOp { configuration.enabled = true; decode(configuration, iter->second); } catch (buffer::error& err) { - ldpp_dout(this, 1) << "ERROR: failed to decode logging attribute '" << RGW_ATTR_BUCKET_LOGGING - << "'. error: " << err.what() << dendl; + ldpp_dout(this, 1) << "WARNING: failed to decode logging attribute '" << RGW_ATTR_BUCKET_LOGGING + << "' for bucket '" << src_bucket_id << "', error: " << err.what() << dendl; op_ret = -EINVAL; return; } + rgw_bucket target_bucket_id; + if (op_ret = rgw::bucketlogging::get_bucket_id(configuration.target_bucket, s->bucket_tenant, target_bucket_id); op_ret < 0) { + ldpp_dout(this, 1) << "ERROR: failed to parse target bucket '" << configuration.target_bucket << "', ret = " << op_ret << dendl; + return; + } std::unique_ptr<rgw::sal::Bucket> target_bucket; - op_ret = driver->load_bucket(this, rgw_bucket(s->bucket_tenant, configuration.target_bucket), + op_ret = driver->load_bucket(this, target_bucket_id, &target_bucket, y); if (op_ret < 0) { - ldpp_dout(this, 1) << "ERROR: failed to get target bucket '" << configuration.target_bucket << "', ret = " << op_ret << dendl; + ldpp_dout(this, 1) << "ERROR: failed to get target bucket '" << target_bucket_id << "', ret = " << op_ret << dendl; return; } std::string obj_name; RGWObjVersionTracker objv_tracker; op_ret = target_bucket->get_logging_object_name(obj_name, configuration.target_prefix, null_yield, this, &objv_tracker); if (op_ret < 0) { - ldpp_dout(this, 1) << "ERROR: failed to get pending logging object name from target bucket '" << configuration.target_bucket << "'" << dendl; + ldpp_dout(this, 1) << "ERROR: failed to get pending logging object name from target bucket '" << target_bucket_id << "'" << dendl; return; } op_ret = rgw::bucketlogging::rollover_logging_object(configuration, target_bucket, obj_name, this, null_yield, true, &objv_tracker); if (op_ret < 0) { ldpp_dout(this, 1) << "ERROR: failed to flush pending logging object '" << obj_name - << "' to target bucket '" << configuration.target_bucket << "'" << dendl; + << "' to target bucket '" << target_bucket_id << "'" << dendl; return; } - ldpp_dout(this, 20) << "flushed pending logging object '" << obj_name + ldpp_dout(this, 20) << "INFO: flushed pending logging object '" << obj_name << "' to target bucket '" << configuration.target_bucket << "'" << dendl; } }; diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc index 9edb79d8fd0..885991244a6 100644 --- a/src/rgw/rgw_rest_s3.cc +++ b/src/rgw/rgw_rest_s3.cc @@ -6732,7 +6732,7 @@ rgw::auth::s3::LocalEngine::authenticate( /* Ignore signature for HTTP OPTIONS */ if (s->op_type == RGW_OP_OPTIONS_CORS) { auto apl = apl_factory->create_apl_local( - cct, s, user->get_info(), std::move(account), std::move(policies), + cct, s, std::move(user), std::move(account), std::move(policies), k.subuser, std::nullopt, access_key_id); return result_t::grant(std::move(apl), completer_factory(k.key)); } @@ -6753,7 +6753,7 @@ rgw::auth::s3::LocalEngine::authenticate( } auto apl = apl_factory->create_apl_local( - cct, s, user->get_info(), std::move(account), std::move(policies), + cct, s, std::move(user), std::move(account), std::move(policies), k.subuser, std::nullopt, access_key_id); return result_t::grant(std::move(apl), completer_factory(k.key)); } @@ -6962,7 +6962,7 @@ rgw::auth::s3::STSEngine::authenticate( string subuser; auto apl = local_apl_factory->create_apl_local( - cct, s, user->get_info(), std::move(account), std::move(policies), + cct, s, std::move(user), std::move(account), std::move(policies), subuser, token.perm_mask, std::string(_access_key_id)); return result_t::grant(std::move(apl), completer_factory(token.secret_access_key)); } diff --git a/src/rgw/rgw_s3_filter.h b/src/rgw/rgw_s3_filter.h index 9bbc4ef0088..0273da9a364 100644 --- a/src/rgw/rgw_s3_filter.h +++ b/src/rgw/rgw_s3_filter.h @@ -9,6 +9,7 @@ class XMLObj; struct rgw_s3_key_filter { + bool operator==(const rgw_s3_key_filter& rhs) const = default; std::string prefix_rule; std::string suffix_rule; std::string regex_rule; diff --git a/src/rgw/rgw_sal.h b/src/rgw/rgw_sal.h index 4b94f74b851..97e25179fc9 100644 --- a/src/rgw/rgw_sal.h +++ b/src/rgw/rgw_sal.h @@ -1006,20 +1006,27 @@ class Bucket { optional_yield y, const DoutPrefixProvider *dpp) = 0; /** Read the name of the pending bucket logging object name */ - virtual int get_logging_object_name(std::string& obj_name, - const std::string& prefix, - optional_yield y, + virtual int get_logging_object_name(std::string& obj_name, + const std::string& prefix, + optional_yield y, const DoutPrefixProvider *dpp, RGWObjVersionTracker* objv_tracker) = 0; /** Update the name of the pending bucket logging object name */ - virtual int set_logging_object_name(const std::string& obj_name, - const std::string& prefix, - optional_yield y, - const DoutPrefixProvider *dpp, + virtual int set_logging_object_name(const std::string& obj_name, + const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, bool new_obj, RGWObjVersionTracker* objv_tracker) = 0; + /** Remove the object holding the name of the pending bucket logging object */ + virtual int remove_logging_object_name(const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, + RGWObjVersionTracker* objv_tracker) = 0; /** Move the pending bucket logging object into the bucket */ virtual int commit_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp) = 0; + //** Remove the pending bucket logging object */ + virtual int remove_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp) = 0; /** Write a record to the pending bucket logging object */ virtual int write_logging_object(const std::string& obj_name, const std::string& record, optional_yield y, const DoutPrefixProvider *dpp, bool async_completion) = 0; diff --git a/src/rgw/rgw_sal_filter.h b/src/rgw/rgw_sal_filter.h index 947ce9d4bf5..b6b6ed42b8f 100644 --- a/src/rgw/rgw_sal_filter.h +++ b/src/rgw/rgw_sal_filter.h @@ -666,24 +666,33 @@ public: optional_yield y, const DoutPrefixProvider *dpp) override { return next->remove_topics(objv_tracker, y, dpp); } - int get_logging_object_name(std::string& obj_name, - const std::string& prefix, - optional_yield y, + int get_logging_object_name(std::string& obj_name, + const std::string& prefix, + optional_yield y, const DoutPrefixProvider *dpp, RGWObjVersionTracker* objv_tracker) override { return next->get_logging_object_name(obj_name, prefix, y, dpp, objv_tracker); } - int set_logging_object_name(const std::string& obj_name, - const std::string& prefix, - optional_yield y, - const DoutPrefixProvider *dpp, + int set_logging_object_name(const std::string& obj_name, + const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, bool new_obj, RGWObjVersionTracker* objv_track) override { - return next->set_logging_object_name(obj_name, prefix, y, dpp, new_obj, objv_track); + return next->set_logging_object_name(obj_name, prefix, y, dpp, new_obj, objv_track); + } + int remove_logging_object_name(const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, + RGWObjVersionTracker* objv_tracker) override { + return next->remove_logging_object_name(prefix, y, dpp, objv_tracker); } int commit_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp)override { return next->commit_logging_object(obj_name, y, dpp); } + int remove_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp) override { + return next->remove_logging_object(obj_name, y, dpp); + } int write_logging_object(const std::string& obj_name, const std::string& record, optional_yield y, const DoutPrefixProvider *dpp, bool async_completion) override { return next->write_logging_object(obj_name, record, y, dpp, async_completion); } diff --git a/src/rgw/rgw_sal_store.h b/src/rgw/rgw_sal_store.h index 5cb98d23158..99b90564997 100644 --- a/src/rgw/rgw_sal_store.h +++ b/src/rgw/rgw_sal_store.h @@ -253,18 +253,23 @@ class StoreBucket : public Bucket { optional_yield y, const DoutPrefixProvider *dpp) override {return 0;} int remove_topics(RGWObjVersionTracker* objv_tracker, optional_yield y, const DoutPrefixProvider *dpp) override {return 0;} - int get_logging_object_name(std::string& obj_name, - const std::string& prefix, - optional_yield y, + int get_logging_object_name(std::string& obj_name, + const std::string& prefix, + optional_yield y, const DoutPrefixProvider *dpp, RGWObjVersionTracker* objv_tracker) override { return 0; } - int set_logging_object_name(const std::string& obj_name, - const std::string& prefix, - optional_yield y, - const DoutPrefixProvider *dpp, + int set_logging_object_name(const std::string& obj_name, + const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, bool new_obj, RGWObjVersionTracker* objv_tracker) override { return 0; } + int remove_logging_object_name(const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, + RGWObjVersionTracker* objv_tracker) override { return 0; } int commit_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp) override { return 0; } + int remove_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp) override { return 0; } int write_logging_object(const std::string& obj_name, const std::string& record, optional_yield y, const DoutPrefixProvider *dpp, bool async_completion) override { return 0; } diff --git a/src/rgw/rgw_swift_auth.cc b/src/rgw/rgw_swift_auth.cc index 032b3734bf9..937f74601b3 100644 --- a/src/rgw/rgw_swift_auth.cc +++ b/src/rgw/rgw_swift_auth.cc @@ -522,7 +522,7 @@ ExternalTokenEngine::authenticate(const DoutPrefixProvider* dpp, } auto apl = apl_factory->create_apl_local( - cct, s, user->get_info(), std::move(account), + cct, s, std::move(user), std::move(account), std::move(policies), extract_swift_subuser(swift_user), std::nullopt, LocalApplier::NO_ACCESS_KEY); return result_t::grant(std::move(apl)); @@ -685,7 +685,7 @@ SignedTokenEngine::authenticate(const DoutPrefixProvider* dpp, } auto apl = apl_factory->create_apl_local( - cct, s, user->get_info(), std::move(account), + cct, s, std::move(user), std::move(account), std::move(policies), extract_swift_subuser(swift_user), std::nullopt, LocalApplier::NO_ACCESS_KEY); return result_t::grant(std::move(apl)); diff --git a/src/rgw/rgw_swift_auth.h b/src/rgw/rgw_swift_auth.h index 9049c54f5ca..c27a24a2619 100644 --- a/src/rgw/rgw_swift_auth.h +++ b/src/rgw/rgw_swift_auth.h @@ -23,8 +23,8 @@ namespace swift { class TempURLApplier : public rgw::auth::LocalApplier { public: TempURLApplier(CephContext* const cct, - const RGWUserInfo& user_info) - : LocalApplier(cct, user_info, std::nullopt, {}, LocalApplier::NO_SUBUSER, + std::unique_ptr<rgw::sal::User> user) + : LocalApplier(cct, std::move(user), std::nullopt, {}, LocalApplier::NO_SUBUSER, std::nullopt, LocalApplier::NO_ACCESS_KEY) {} @@ -155,8 +155,8 @@ public: class SwiftAnonymousApplier : public rgw::auth::LocalApplier { public: SwiftAnonymousApplier(CephContext* const cct, - const RGWUserInfo& user_info) - : LocalApplier(cct, user_info, std::nullopt, {}, LocalApplier::NO_SUBUSER, + std::unique_ptr<rgw::sal::User> user) + : LocalApplier(cct, std::move(user), std::nullopt, {}, LocalApplier::NO_SUBUSER, std::nullopt, LocalApplier::NO_ACCESS_KEY) { } bool is_admin_of(const rgw_owner& o) const {return false;} @@ -238,7 +238,7 @@ class DefaultStrategy : public rgw::auth::Strategy, aplptr_t create_apl_local(CephContext* const cct, const req_state* const s, - const RGWUserInfo& user_info, + std::unique_ptr<rgw::sal::User> user, std::optional<RGWAccountInfo> account, std::vector<IAM::Policy> policies, const std::string& subuser, @@ -247,7 +247,7 @@ class DefaultStrategy : public rgw::auth::Strategy, auto apl = \ rgw::auth::add_3rdparty(driver, rgw_user(s->account_name), rgw::auth::add_sysreq(cct, driver, s, - LocalApplier(cct, user_info, std::move(account), std::move(policies), + LocalApplier(cct, std::move(user), std::move(account), std::move(policies), subuser, perm_mask, access_key_id))); /* TODO(rzarzynski): replace with static_ptr. */ return aplptr_t(new decltype(apl)(std::move(apl))); @@ -259,7 +259,9 @@ class DefaultStrategy : public rgw::auth::Strategy, /* TempURL doesn't need any user account override. It's a Swift-specific * mechanism that requires account name internally, so there is no * business with delegating the responsibility outside. */ - return aplptr_t(new rgw::auth::swift::TempURLApplier(cct, user_info)); + std::unique_ptr<rgw::sal::User> user = s->user->clone(); + user->get_info() = user_info; + return aplptr_t(new rgw::auth::swift::TempURLApplier(cct, std::move(user))); } public: diff --git a/src/test/ObjectMap/KeyValueDBMemory.cc b/src/test/ObjectMap/KeyValueDBMemory.cc index 234e963397e..cfe25930d6a 100644 --- a/src/test/ObjectMap/KeyValueDBMemory.cc +++ b/src/test/ObjectMap/KeyValueDBMemory.cc @@ -132,12 +132,26 @@ public: return ""; } + string_view key_as_sv() override { + if (valid()) + return (*it).first.second; + else + return ""; + } + pair<string,string> raw_key() override { if (valid()) return (*it).first; else return make_pair("", ""); } + + pair<string_view,string_view> raw_key_as_sv() override { + if (valid()) + return (*it).first; + else + return make_pair("", ""); + } bool raw_key_is_prefixed(const string &prefix) override { return prefix == (*it).first.first; @@ -150,6 +164,13 @@ public: return bufferlist(); } + std::string_view value_as_sv() override { + if (valid()) + return std::string_view{it->second.c_str(), it->second.length()}; + else + return std::string_view(); + } + int status() override { return 0; } diff --git a/src/test/cli/radosgw-admin/help.t b/src/test/cli/radosgw-admin/help.t index 8b7c6d6e3db..c1675d11a80 100644 --- a/src/test/cli/radosgw-admin/help.t +++ b/src/test/cli/radosgw-admin/help.t @@ -43,7 +43,8 @@ bucket sync disable disable bucket sync bucket sync enable enable bucket sync bucket radoslist list rados objects backing bucket's objects - bucket logging flush flush pending log records object of source bucket to the log bucket to bucket + bucket logging flush flush pending log records object of source bucket to the log bucket + bucket logging info get info on bucket logging configuration on source bucket or list of sources in log bucket bi get retrieve bucket index object entries bi put store bucket index object entries bi list list raw bucket index entries diff --git a/src/test/librbd/migration/test_mock_HttpClient.cc b/src/test/librbd/migration/test_mock_HttpClient.cc index f3888755c79..901c4231dd0 100644 --- a/src/test/librbd/migration/test_mock_HttpClient.cc +++ b/src/test/librbd/migration/test_mock_HttpClient.cc @@ -307,7 +307,7 @@ TEST_F(TestMockMigrationHttpClient, OpenCloseHttps) { boost::asio::ssl::context ssl_context{boost::asio::ssl::context::tlsv12}; load_server_certificate(ssl_context); - boost::beast::ssl_stream<boost::beast::tcp_stream> ssl_stream{ + boost::asio::ssl::stream<boost::asio::ip::tcp::socket> ssl_stream{ std::move(socket), ssl_context}; C_SaferCond on_ssl_handshake_ctx; @@ -341,7 +341,7 @@ TEST_F(TestMockMigrationHttpClient, OpenHttpsHandshakeFail) { boost::asio::ssl::context ssl_context{boost::asio::ssl::context::tlsv12}; load_server_certificate(ssl_context); - boost::beast::ssl_stream<boost::beast::tcp_stream> ssl_stream{ + boost::asio::ssl::stream<boost::asio::ip::tcp::socket> ssl_stream{ std::move(socket), ssl_context}; C_SaferCond on_ssl_handshake_ctx; diff --git a/src/test/objectstore/ObjectStoreImitator.h b/src/test/objectstore/ObjectStoreImitator.h index d71d7f2fe58..875f9041b83 100644 --- a/src/test/objectstore/ObjectStoreImitator.h +++ b/src/test/objectstore/ObjectStoreImitator.h @@ -347,6 +347,16 @@ public: ) override { return {}; } + + int omap_iterate(CollectionHandle &c, ///< [in] collection + const ghobject_t &oid, ///< [in] object + /// [in] where the iterator should point to at the beginning + omap_iter_seek_t start_from, + std::function<omap_iter_ret_t(std::string_view, std::string_view)> f + ) override { + return 0; + } + void set_fsid(uuid_d u) override {} uuid_d get_fsid() override { return {}; } uint64_t estimate_objects_overhead(uint64_t num_objects) override { diff --git a/src/test/objectstore/test_bluefs.cc b/src/test/objectstore/test_bluefs.cc index d3b0d0ac3a4..60147b5397c 100644 --- a/src/test/objectstore/test_bluefs.cc +++ b/src/test/objectstore/test_bluefs.cc @@ -1608,6 +1608,91 @@ TEST(BlueFS, test_log_runway_advance_seq) { fs.compact_log(); } +TEST(BlueFS, test_69481_truncate_corrupts_log) { + uint64_t size = 1048576 * 128; + TempBdev bdev{size}; + BlueFS fs(g_ceph_context); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false)); + uuid_d fsid; + ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); + ASSERT_EQ(0, fs.mount()); + ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); + + BlueFS::FileWriter *f = nullptr; + BlueFS::FileWriter *a = nullptr; + ASSERT_EQ(0, fs.mkdir("dir")); + ASSERT_EQ(0, fs.open_for_write("dir", "test-file", &f, false)); + ASSERT_EQ(0, fs.open_for_write("dir", "just-allocate", &a, false)); + + // create 4 distinct extents in file f + // a is here only to prevent f from merging extents together + fs.preallocate(f->file, 0, 0x10000); + fs.preallocate(a->file, 0, 0x10000); + fs.preallocate(f->file, 0, 0x20000); + fs.preallocate(a->file, 0, 0x20000); + fs.preallocate(f->file, 0, 0x30000); + fs.preallocate(a->file, 0, 0x30000); + fs.preallocate(f->file, 0, 0x40000); + fs.preallocate(a->file, 0, 0x40000); + fs.close_writer(a); + + fs.truncate(f, 0); + fs.fsync(f); + + bufferlist bl; + bl.append(std::string(" ", 0x15678)); + f->append(bl); + fs.truncate(f, 0x15678); + fs.fsync(f); + fs.close_writer(f); + + fs.umount(); + // remount to verify + ASSERT_EQ(0, fs.mount()); + fs.umount(); +} + +TEST(BlueFS, test_69481_truncate_asserts) { + uint64_t size = 1048576 * 128; + TempBdev bdev{size}; + BlueFS fs(g_ceph_context); + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false)); + uuid_d fsid; + ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); + ASSERT_EQ(0, fs.mount()); + ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); + + BlueFS::FileWriter *f = nullptr; + BlueFS::FileWriter *a = nullptr; + ASSERT_EQ(0, fs.mkdir("dir")); + ASSERT_EQ(0, fs.open_for_write("dir", "test-file", &f, false)); + ASSERT_EQ(0, fs.open_for_write("dir", "just-allocate", &a, false)); + + // create 4 distinct extents in file f + // a is here only to prevent f from merging extents together + fs.preallocate(f->file, 0, 0x10000); + fs.preallocate(a->file, 0, 0x10000); + fs.preallocate(f->file, 0, 0x20000); + fs.preallocate(a->file, 0, 0x20000); + fs.preallocate(f->file, 0, 0x30000); + fs.preallocate(a->file, 0, 0x30000); + fs.preallocate(f->file, 0, 0x40000); + fs.preallocate(a->file, 0, 0x40000); + fs.close_writer(a); + + fs.truncate(f, 0); + fs.fsync(f); + + bufferlist bl; + bl.append(std::string(" ", 0x35678)); + f->append(bl); + fs.truncate(f, 0x35678); + fs.fsync(f); + fs.close_writer(f); + + fs.umount(); +} + int main(int argc, char **argv) { auto args = argv_to_vec(argc, argv); map<string,string> defaults = { diff --git a/src/test/pybind/pytest.ini b/src/test/pybind/pytest.ini index dccf2a346dc..97569e88299 100644 --- a/src/test/pybind/pytest.ini +++ b/src/test/pybind/pytest.ini @@ -7,3 +7,4 @@ markers = stats tier watch + wait diff --git a/src/test/pybind/test_ceph_argparse.py b/src/test/pybind/test_ceph_argparse.py index 3039223abdf..630e6046b24 100755 --- a/src/test/pybind/test_ceph_argparse.py +++ b/src/test/pybind/test_ceph_argparse.py @@ -217,7 +217,7 @@ class TestPG(TestArgparse): def test_pg_missing_args_output(self): ret, _, stderr = self._capture_output(['pg'], stderr=True) self.assertEqual({}, ret) - self.assertRegexpMatches(stderr, re.compile('no valid command found.* closest matches')) + self.assertRegex(stderr, re.compile('no valid command found.* closest matches')) def test_pg_wrong_arg_output(self): ret, _, stderr = self._capture_output(['pg', 'map', 'bad-pgid'], @@ -416,10 +416,10 @@ class TestMDS(TestArgparse): class TestFS(TestArgparse): - + def test_dump(self): self.check_0_or_1_natural_arg('fs', 'dump') - + def test_fs_new(self): self._assert_valid_command(['fs', 'new', 'default', 'metadata', 'data']) @@ -912,7 +912,7 @@ class TestOSD(TestArgparse): '1.2.3.4/567', '600.40']) self._assert_valid_command(['osd', 'blocklist', action, '1.2.3.4', '600.40']) - + self._assert_valid_command(['osd', 'blocklist', action, 'v1:1.2.3.4', '600.40']) self._assert_valid_command(['osd', 'blocklist', action, @@ -925,7 +925,7 @@ class TestOSD(TestArgparse): 'v2:[2607:f298:4:2243::5522]:0/0', '600.40']) self._assert_valid_command(['osd', 'blocklist', action, '[2001:0db8::85a3:0000:8a2e:0370:7334]:0/0', '600.40']) - + self.assertEqual({}, validate_command(sigdict, ['osd', 'blocklist', action, 'invalid', diff --git a/src/test/pybind/test_rados.py b/src/test/pybind/test_rados.py index cb2a4f96101..25423bd8dcb 100644 --- a/src/test/pybind/test_rados.py +++ b/src/test/pybind/test_rados.py @@ -207,7 +207,7 @@ class TestRados(object): def test_get_fsid(self): fsid = self.rados.get_fsid() - assert re.match('[0-9a-f\-]{36}', fsid, re.I) + assert re.match(r'[0-9a-f\-]{36}', fsid, re.I) def test_blocklist_add(self): self.rados.blocklist_add("1.2.3.4/123", 1) diff --git a/src/tools/cephfs/top/cephfs-top b/src/tools/cephfs/top/cephfs-top index 9ecc47fc2d5..45900f9a025 100755 --- a/src/tools/cephfs/top/cephfs-top +++ b/src/tools/cephfs/top/cephfs-top @@ -148,7 +148,7 @@ def wrap(s, sl): """return a '+' suffixed wrapped string""" if len(s) < sl: return s - return f'{s[0:sl-1]}+' + return f'{s[0:sl - 1]}+' class FSTopBase(object): |