diff options
45 files changed, 1047 insertions, 301 deletions
diff --git a/.githubmap b/.githubmap index 5265fa59bed..c8ae6e284a2 100644 --- a/.githubmap +++ b/.githubmap @@ -9,6 +9,7 @@ # a2batic Kanika Murarka <kmurarka@redhat.com> aaSharma14 Aashish Sharma <aasharma@redhat.com> +abhishek-kane Abhishek Kane <abhishek.kane@ibm.com> <abhishek.kane@gmail.com> aclamk Adam Kupczyk <akupczyk@redhat.com> adamemerson Adam C. Emerson <aemerson@redhat.com> adk3798 Adam King <adking@redhat.com> @@ -13,6 +13,7 @@ Aashish Sharma <aasharma@redhat.com> <66050535+aaSharma14@users.noreply.github.c Aashish Sharma <aasharma@redhat.com> <aasharma@li-e74156cc-2f67-11b2-a85c-e98659a63c5c.ibm.com> Aashish Sharma <aasharma@redhat.com> <aashishsharma@fedora.redhat.com> Aashish Sharma <aasharma@redhat.com> <aashishsharma@localhost.localdomain> +Abhishek Kane <abhishek.kane@ibm.com> <abhishek.kane@gmail.com> Abhishek Lekshmanan <abhishek.lekshmanan@cern.ch> <abhishek.l@cern.ch> Abhishek Lekshmanan <abhishek@suse.com> <abhishek.lekshmanan@gmail.com> Abhishek Lekshmanan <abhishek@suse.com> <alekshmanan@suse.com> diff --git a/.organizationmap b/.organizationmap index e59e6ae24e1..ac9b0ea70fe 100644 --- a/.organizationmap +++ b/.organizationmap @@ -345,6 +345,7 @@ Huawei <contact@huawei.com> Yehu <yehu5@huawei.com> Huayun <contact@huayun.com> Zheng Yin <zhengyin@huayun.com> Huazhong University of Science and Technology <contact@hust.edu.cn> Luo Runbing <runsisi@hust.edu.cn> HXT Semiconductor <contact@hxt-semitech.org> Jiang Yutang <yutang2.jiang@hxt-semitech.com> +IBM <contact@IBM.com> Abhishek Kane <abhishek.kane@ibm.com> IBM <contact@IBM.com> Adam Kupczyk <akupczyk@ibm.com> IBM <contact@IBM.com> Afreen Misbah <afreen@ibm.com> IBM <contact@IBM.com> Aliaksei Makarau <aliaksei.makarau@ibm.com> diff --git a/.peoplemap b/.peoplemap index 418e8505fb4..ed70830c092 100644 --- a/.peoplemap +++ b/.peoplemap @@ -16,6 +16,7 @@ # # git log --pretty='%aN <%aE>' $range | git -c mailmap.file=.peoplemap check-mailmap --stdin | sort | uniq | sed -e 's/\(.*\) \(<.*\)/\2 \1/' | uniq --skip-field=1 --all-repeated | sed -e 's/\(.*>\) \(.*\)/\2 \1/' # +Abhishek Kane <abhishek.kane@ibm.com> <abhishek.kane@gmail.com> Abhishek Lekshmanan <abhishek.lekshmanan@cern.ch> <abhishek@suse.com> Adam Kupczyk <akupczyk@ibm.com> <akupczyk@redhat.com> <akupczyk@mirantis.com> Alexandre Marangone <amarango@redhat.com> Alexandre Marangone <alexandre.marangone@inktank.com> diff --git a/container/Containerfile b/container/Containerfile index c954ebed1be..9a5a88e76a1 100644 --- a/container/Containerfile +++ b/container/Containerfile @@ -212,6 +212,7 @@ RUN rpm -q $(cat packages.txt) && rm -f /var/lib/rpm/__db* && rm -f *packages.tx # Set some envs in the container for quickly inspecting details about the build at runtime ENV CEPH_IS_DEVEL="${CI_CONTAINER}" \ CEPH_REF="${CEPH_REF}" \ + CEPH_VERSION="${CEPH_REF}" \ CEPH_OSD_FLAVOR="${OSD_FLAVOR}" \ FROM_IMAGE="${FROM_IMAGE}" diff --git a/doc/cephfs/disaster-recovery-experts.rst b/doc/cephfs/disaster-recovery-experts.rst index 7677b42f47e..b01a3dfde6a 100644 --- a/doc/cephfs/disaster-recovery-experts.rst +++ b/doc/cephfs/disaster-recovery-experts.rst @@ -21,43 +21,46 @@ Advanced: Metadata repair tools Journal export -------------- -Before attempting dangerous operations, make a copy of the journal like so: +Before attempting any dangerous operation, make a copy of the journal by +running the following command: -:: +.. prompt:: bash # - cephfs-journal-tool journal export backup.bin + cephfs-journal-tool journal export backup.bin -Note that this command may not always work if the journal is badly corrupted, -in which case a RADOS-level copy should be made (http://tracker.ceph.com/issues/9902). +If the journal is badly corrupted, this command might not work. If the journal +is badly corrupted, make a RADOS-level copy +(http://tracker.ceph.com/issues/9902). Dentry recovery from journal ---------------------------- If a journal is damaged or for any reason an MDS is incapable of replaying it, -attempt to recover what file metadata we can like so: +attempt to recover file metadata by running the following command: -:: +.. prompt:: bash # - cephfs-journal-tool event recover_dentries summary + cephfs-journal-tool event recover_dentries summary -This command by default acts on MDS rank 0, pass --rank=<n> to operate on other ranks. +By default, this command acts on MDS rank ``0``. Pass the option ``--rank=<n>`` +to the ``cephfs-journal-tool`` command to operate on other ranks. -This command will write any inodes/dentries recoverable from the journal -into the backing store, if these inodes/dentries are higher-versioned -than the previous contents of the backing store. If any regions of the journal -are missing/damaged, they will be skipped. +This command writes all inodes and dentries recoverable from the journal into +the backing store, but only if these inodes and dentries are higher-versioned +than the existing contents of the backing store. Any regions of the journal +that are missing or damaged will be skipped. -Note that in addition to writing out dentries and inodes, this command will update -the InoTables of each 'in' MDS rank, to indicate that any written inodes' numbers -are now in use. In simple cases, this will result in an entirely valid backing +In addition to writing out dentries and inodes, this command updates the +InoTables of each ``in`` MDS rank, to indicate that any written inodes' numbers +are now in use. In simple cases, this will result in an entirely valid backing store state. .. warning:: - The resulting state of the backing store is not guaranteed to be self-consistent, - and an online MDS scrub will be required afterwards. The journal contents - will not be modified by this command, you should truncate the journal + The resulting state of the backing store is not guaranteed to be + self-consistent, and an online MDS scrub will be required afterwards. The + journal contents will not be modified by this command. Truncate the journal separately after recovering what you can. Journal truncation diff --git a/doc/radosgw/bucket_logging.rst b/doc/radosgw/bucket_logging.rst index cb9f8465d20..f3e790f5705 100644 --- a/doc/radosgw/bucket_logging.rst +++ b/doc/radosgw/bucket_logging.rst @@ -15,6 +15,12 @@ The log bucket can accumulate logs from multiple buckets. It is recommended to c a different "prefix" for each bucket, so that the logs of different buckets will be stored in different objects in the log bucket. +.. note:: + + - The log bucket must be created before enabling logging on a bucket + - The log bucket cannot be the same as the bucket being logged + - The log bucket cannot have logging enabled on it + .. toctree:: :maxdepth: 1 @@ -29,6 +35,7 @@ Adding a log object to the log bucket is done "lazily", meaning, that if no more remain outside of the log bucket even after the configured time has passed. To counter that, you can flush all logging objects on a given source bucket to log them, regardless if enough time passed or if no more records are written to the object. +Flushing will happen automatically when logging is disabled on a bucket, its logging configuration is changed, or the bucket is deleted. Standard ```````` @@ -72,7 +79,7 @@ has the following format: :: - <prefix><bucket owner>/<source region>/<bucket name>/<year>/<month>/<day>/<year-month-day-hour-minute-second>-<16 bytes unique-id> + <prefix><bucket owner>/<source region>/[tenant:]<bucket name>/<year>/<month>/<day>/<year-month-day-hour-minute-second>-<16 bytes unique-id> For example: @@ -90,7 +97,7 @@ Journal minimum amount of data used for journaling bucket changes (this is a Ceph extension). - bucket owner (or dash if empty) - - bucket name (or dash if empty) + - bucket name (or dash if empty). in the format: ``[tenant:]<bucket name>`` - time in the following format: ``[day/month/year:hour:minute:second timezone]`` - object key (or dash if empty) - operation in the following format: ``WEBSITE/REST.<HTTP method>.<resource>`` @@ -111,7 +118,7 @@ Standard based on `AWS Logging Record Format`_. - bucket owner (or dash if empty) - - bucket name (or dash if empty) + - bucket name (or dash if empty). in the format: ``[tenant:]<bucket name>`` - time - remote IP (not supported, always a dash) - user or account (or dash if empty) diff --git a/doc/radosgw/config-ref.rst b/doc/radosgw/config-ref.rst index b4aa56fff54..405bc727208 100644 --- a/doc/radosgw/config-ref.rst +++ b/doc/radosgw/config-ref.rst @@ -75,10 +75,11 @@ aggressiveness of lifecycle processing: .. confval:: rgw_lc_max_wp_worker These values can be tuned based upon your specific workload to further increase the -aggressiveness of lifecycle processing. For a workload with a larger number of buckets (thousands) -you would look at increasing the :confval:`rgw_lc_max_worker` value from the default value of 3 whereas for a -workload with a smaller number of buckets but higher number of objects (hundreds of thousands) -per bucket you would consider increasing :confval:`rgw_lc_max_wp_worker` from the default value of 3. +aggressiveness of lifecycle processing. For a workload with a large number of buckets (thousands) +you would raise the number of workers by increasing :confval:`rgw_lc_max_worker` +from the default value of 3. Whereas for a workload with a higher number of objects per bucket +(hundreds of thousands) you would raise the number of parallel threads +by increasing :confval:`rgw_lc_max_wp_worker` from the default value of 3. .. note:: When looking to tune either of these specific values please validate the current Cluster performance and Ceph Object Gateway utilization before increasing. diff --git a/examples/rgw/boto3/head_bucket_stats.py b/examples/rgw/boto3/head_bucket_stats.py new file mode 100755 index 00000000000..1de40d63f4a --- /dev/null +++ b/examples/rgw/boto3/head_bucket_stats.py @@ -0,0 +1,27 @@ +#!/usr/bin/python + +import boto3 +import sys + +if len(sys.argv) != 2: + print('Usage: ' + sys.argv[0] + ' <bucket>') + sys.exit(1) + +# bucket name as first argument +bucketname = sys.argv[1] + +# endpoint and keys from vstart +endpoint = 'http://127.0.0.1:8000' +access_key='0555b35654ad1656d804' +secret_key='h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q==' + +client = boto3.client('s3', + endpoint_url=endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + +# reading bucket stats via HeadBucket + +response = client.head_bucket(Bucket=bucketname, ReadStats=True) + +print('Objects:', response['ObjectCount'], 'Bytes:', response['BytesUsed']) diff --git a/examples/rgw/boto3/service-2.sdk-extras.json b/examples/rgw/boto3/service-2.sdk-extras.json index b81667ecd09..4618543d61b 100644 --- a/examples/rgw/boto3/service-2.sdk-extras.json +++ b/examples/rgw/boto3/service-2.sdk-extras.json @@ -379,7 +379,36 @@ }, "documentation":"<p>A filter for all log object. Filter for the object by its key (prefix, suffix and regex).</p>", "locationName":"Filter" - } + }, + "HeadBucketRequest": { + "members": { + "ReadStats":{ + "shape":"ReadStats", + "documentation":"<p>Read additional usage statistics for <code>ObjectCount</code> and <code>BytesUsed</code> in the response.</p> <note> <p>This request parameter is a Ceph RGW extension.</p> </note>", + "location":"querystring", + "locationName":"read-stats" + } + } + }, + "HeadBucketOutput":{ + "members":{ + "ObjectCount":{ + "shape":"ObjectCount", + "documentation": "<p>Total number of objects/versions in the bucket.</p>", + "location": "header", + "locationName": "x-rgw-object-count" + }, + "BytesUsed":{ + "shape":"BytesUsed", + "documentation": "<p>Total size in bytes of all objects/versions in the bucket.</p>", + "location": "header", + "locationName": "x-rgw-bytes-used" + } + } + }, + "ReadStats":{"type":"boolean"}, + "ObjectCount":{"type":"integer"}, + "BytesUsed":{"type":"integer"} }, "documentation":"<p/>" } diff --git a/qa/standalone/osd/osd-bluefs-volume-ops.sh b/qa/standalone/osd/osd-bluefs-volume-ops.sh index aedfbc9b5cb..f7424de8ce1 100755 --- a/qa/standalone/osd/osd-bluefs-volume-ops.sh +++ b/qa/standalone/osd/osd-bluefs-volume-ops.sh @@ -72,7 +72,7 @@ function TEST_bluestore() { truncate $dir/0/block -s 4294967296 # 4GB ceph-bluestore-tool --path $dir/0 bluefs-bdev-expand || return 1 - truncate $dir/1/block -s 4311744512 # 4GB + 16MB + truncate $dir/1/block -s 11811160064 # 11GB to get bdev label at 10737418240 ceph-bluestore-tool --path $dir/1 bluefs-bdev-expand || return 1 truncate $dir/2/block -s 4295099392 # 4GB + 129KB ceph-bluestore-tool --path $dir/2 bluefs-bdev-expand || return 1 diff --git a/qa/standalone/scrub/osd-recovery-scrub.sh b/qa/standalone/scrub/osd-recovery-scrub.sh index 843e9b9901b..7b77a60f35b 100755 --- a/qa/standalone/scrub/osd-recovery-scrub.sh +++ b/qa/standalone/scrub/osd-recovery-scrub.sh @@ -163,7 +163,7 @@ function wait_for_scrub_mod() { fi sleep 1 # are we still the primary? - local current_primary=`bin/ceph pg $pgid query | jq '.acting[0]' ` + local current_primary=`./bin/ceph pg $pgid query | jq '.acting[0]' ` if [ $orig_primary != $current_primary ]; then echo $orig_primary no longer primary for $pgid return 0 @@ -194,7 +194,7 @@ function pg_scrub_mod() { local last_scrub=$(get_last_scrub_stamp $pgid) # locate the primary - local my_primary=`bin/ceph pg $pgid query | jq '.acting[0]' ` + local my_primary=`./bin/ceph pg $pgid query | jq '.acting[0]' ` local recovery=false ceph pg scrub $pgid #ceph --format json pg dump pgs | jq ".pg_stats | .[] | select(.pgid == \"$pgid\") | .state" diff --git a/qa/standalone/scrub/osd-scrub-test.sh b/qa/standalone/scrub/osd-scrub-test.sh index 85a45a421c1..385479258f2 100755 --- a/qa/standalone/scrub/osd-scrub-test.sh +++ b/qa/standalone/scrub/osd-scrub-test.sh @@ -603,17 +603,16 @@ function TEST_dump_scrub_schedule() { declare -A expct_dmp_duration=( ['dmp_last_duration']="0" ['dmp_last_duration_neg']="not0" ) wait_any_cond $pgid 10 $saved_last_stamp expct_dmp_duration "WaitingAfterScrub_dmp " sched_data || return 1 - sleep 2 - # # step 2: set noscrub and request a "periodic scrub". Watch for the change in the 'is the scrub # scheduled for the future' value # - ceph tell osd.* config set osd_shallow_scrub_chunk_max "3" || return 1 - ceph tell osd.* config set osd_scrub_sleep "2.0" || return 1 ceph osd set noscrub || return 1 sleep 2 + ceph tell osd.* config set osd_shallow_scrub_chunk_max "3" || return 1 + ceph tell osd.* config set osd_scrub_sleep "2.0" || return 1 + sleep 8 saved_last_stamp=${sched_data['query_last_stamp']} ceph tell $pgid schedule-scrub @@ -692,28 +691,28 @@ function wait_initial_scrubs() { # set a long schedule for the periodic scrubs. Wait for the # initial 'no previous scrub is known' scrubs to finish for all PGs. - bin/ceph tell osd.* config set osd_scrub_min_interval 7200 - bin/ceph tell osd.* config set osd_deep_scrub_interval 14400 - bin/ceph tell osd.* config set osd_max_scrubs 32 - bin/ceph tell osd.* config set osd_scrub_sleep 0 - bin/ceph tell osd.* config set osd_shallow_scrub_chunk_max 10 - bin/ceph tell osd.* config set osd_scrub_chunk_max 10 + ceph tell osd.* config set osd_scrub_min_interval 7200 + ceph tell osd.* config set osd_deep_scrub_interval 14400 + ceph tell osd.* config set osd_max_scrubs 32 + ceph tell osd.* config set osd_scrub_sleep 0 + ceph tell osd.* config set osd_shallow_scrub_chunk_max 10 + ceph tell osd.* config set osd_scrub_chunk_max 10 for pg in "${!pg_to_prim_dict[@]}"; do (( extr_dbg >= 1 )) && echo "Scheduling initial scrub for $pg" - bin/ceph tell $pg scrub || return 1 + ceph tell $pg scrub || return 1 done sleep 1 - (( extr_dbg >= 1 )) && bin/ceph pg dump pgs --format=json-pretty | \ + (( extr_dbg >= 1 )) && ceph pg dump pgs --format=json-pretty | \ jq '.pg_stats | map(select(.last_scrub_duration == 0)) | map({pgid: .pgid, last_scrub_duration: .last_scrub_duration})' tout=20 while [ $tout -gt 0 ] ; do sleep 0.5 - (( extr_dbg >= 2 )) && bin/ceph pg dump pgs --format=json-pretty | \ + (( extr_dbg >= 2 )) && ceph pg dump pgs --format=json-pretty | \ jq '.pg_stats | map(select(.last_scrub_duration == 0)) | map({pgid: .pgid, last_scrub_duration: .last_scrub_duration})' - not_done=$(bin/ceph pg dump pgs --format=json-pretty | \ + not_done=$(ceph pg dump pgs --format=json-pretty | \ jq '.pg_stats | map(select(.last_scrub_duration == 0)) | map({pgid: .pgid, last_scrub_duration: .last_scrub_duration})' | wc -l ) # note that we should ignore a header line if [ "$not_done" -le 1 ]; then @@ -782,14 +781,14 @@ function TEST_abort_periodic_for_operator() { wait_initial_scrubs pg_pr || return 1 # limit all OSDs to one scrub at a time - bin/ceph tell osd.* config set osd_max_scrubs 1 - bin/ceph tell osd.* config set osd_stats_update_period_not_scrubbing 1 + ceph tell osd.* config set osd_max_scrubs 1 + ceph tell osd.* config set osd_stats_update_period_not_scrubbing 1 # configure for slow scrubs - bin/ceph tell osd.* config set osd_scrub_sleep 3 - bin/ceph tell osd.* config set osd_shallow_scrub_chunk_max 2 - bin/ceph tell osd.* config set osd_scrub_chunk_max 2 - (( extr_dbg >= 2 )) && bin/ceph tell osd.2 dump_scrub_reservations --format=json-pretty + ceph tell osd.* config set osd_scrub_sleep 3 + ceph tell osd.* config set osd_shallow_scrub_chunk_max 2 + ceph tell osd.* config set osd_scrub_chunk_max 2 + (( extr_dbg >= 2 )) && ceph tell osd.2 dump_scrub_reservations --format=json-pretty # the first PG to work with: local pg1="1.0" @@ -812,7 +811,7 @@ function TEST_abort_periodic_for_operator() { fi # the common primary is allowed two concurrent scrubs - bin/ceph tell osd."${pg_pr[$pg1]}" config set osd_max_scrubs 2 + ceph tell osd."${pg_pr[$pg1]}" config set osd_max_scrubs 2 echo "The two PGs to manipulate are $pg1 and $pg2" set_query_debug "$pg1" @@ -821,31 +820,31 @@ function TEST_abort_periodic_for_operator() { local is_act for i in $( seq 1 3 ) do - is_act=$(bin/ceph pg "$pg1" query | jq '.scrubber.active') + is_act=$(ceph pg "$pg1" query | jq '.scrubber.active') if [[ "$is_act" = "false" ]]; then break fi echo "Still waiting for pg $pg1 to finish scrubbing" sleep 0.7 done - bin/ceph pg dump pgs + ceph pg dump pgs if [[ "$is_act" != "false" ]]; then - bin/ceph pg "$pg1" query + ceph pg "$pg1" query echo "PG $pg1 appears to be still scrubbing" return 1 fi sleep 0.5 echo "Initiating a periodic scrub of $pg1" - (( extr_dbg >= 2 )) && bin/ceph pg "$pg1" query -f json-pretty | jq '.scrubber' - bin/ceph tell $pg1 schedule-deep-scrub || return 1 + (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber' + ceph tell $pg1 schedule-deep-scrub || return 1 sleep 1 - (( extr_dbg >= 2 )) && bin/ceph pg "$pg1" query -f json-pretty | jq '.scrubber' + (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber' for i in $( seq 1 14 ) do sleep 0.5 - stt=$(bin/ceph pg "$pg1" query | jq '.scrubber') + stt=$(ceph pg "$pg1" query | jq '.scrubber') is_active=$(echo $stt | jq '.active') is_reserving_replicas=$(echo $stt | jq '.is_reserving_replicas') if [[ "$is_active" = "true" && "$is_reserving_replicas" = "false" ]]; then @@ -854,49 +853,49 @@ function TEST_abort_periodic_for_operator() { echo "Still waiting for pg $pg1 to start scrubbing: $stt" done if [[ "$is_active" != "true" || "$is_reserving_replicas" != "false" ]]; then - bin/ceph pg "$pg1" query -f json-pretty | jq '.scrubber' + ceph pg "$pg1" query -f json-pretty | jq '.scrubber' echo "The scrub is not active or is reserving replicas" return 1 fi - (( extr_dbg >= 2 )) && bin/ceph pg "$pg1" query -f json-pretty | jq '.scrubber' + (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber' # PG 1 is scrubbing, and has reserved the replicas - soem of which are shared # by PG 2. As the max-scrubs was set to 1, that should prevent PG 2 from # reserving its replicas. - (( extr_dbg >= 1 )) && bin/ceph tell osd.* dump_scrub_reservations --format=json-pretty + (( extr_dbg >= 1 )) && ceph tell osd.* dump_scrub_reservations --format=json-pretty # now - the 2'nd scrub - which should be blocked on reserving set_query_debug "$pg2" - bin/ceph tell "$pg2" schedule-deep-scrub + ceph tell "$pg2" schedule-deep-scrub sleep 0.5 (( extr_dbg >= 2 )) && echo "====================================================================================" - (( extr_dbg >= 2 )) && bin/ceph pg "$pg2" query -f json-pretty | jq '.scrubber' - (( extr_dbg >= 2 )) && bin/ceph pg "$pg1" query -f json-pretty | jq '.scrubber' + (( extr_dbg >= 2 )) && ceph pg "$pg2" query -f json-pretty | jq '.scrubber' + (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber' sleep 1 (( extr_dbg >= 2 )) && echo "====================================================================================" - (( extr_dbg >= 2 )) && bin/ceph pg "$pg2" query -f json-pretty | jq '.scrubber' - (( extr_dbg >= 2 )) && bin/ceph pg "$pg1" query -f json-pretty | jq '.scrubber' + (( extr_dbg >= 2 )) && ceph pg "$pg2" query -f json-pretty | jq '.scrubber' + (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber' # make sure pg2 scrub is stuck in the reserving state - local stt2=$(bin/ceph pg "$pg2" query | jq '.scrubber') + local stt2=$(ceph pg "$pg2" query | jq '.scrubber') local pg2_is_reserving pg2_is_reserving=$(echo $stt2 | jq '.is_reserving_replicas') if [[ "$pg2_is_reserving" != "true" ]]; then echo "The scheduled scrub for $pg2 should have been stuck" - bin/ceph pg dump pgs + ceph pg dump pgs return 1 fi # now - issue an operator-initiated scrub on pg2. # The periodic scrub should be aborted, and the operator-initiated scrub should start. echo "Instructing $pg2 to perform a high-priority scrub" - bin/ceph tell "$pg2" scrub + ceph tell "$pg2" scrub for i in $( seq 1 10 ) do sleep 0.5 - stt2=$(bin/ceph pg "$pg2" query | jq '.scrubber') + stt2=$(ceph pg "$pg2" query | jq '.scrubber') pg2_is_active=$(echo $stt2 | jq '.active') pg2_is_reserving=$(echo $stt2 | jq '.is_reserving_replicas') if [[ "$pg2_is_active" = "true" && "$pg2_is_reserving" != "true" ]]; then diff --git a/qa/standalone/scrub/scrub-helpers.sh b/qa/standalone/scrub/scrub-helpers.sh index 0b14d6028b6..dd37b643e08 100644 --- a/qa/standalone/scrub/scrub-helpers.sh +++ b/qa/standalone/scrub/scrub-helpers.sh @@ -320,7 +320,7 @@ function build_pg_dicts { # if the infile name is '-', fetch the dump directly from the ceph cluster if [[ $infile == "-" ]]; then - local -r ceph_cmd="bin/ceph pg dump pgs_brief -f=json-pretty" + local -r ceph_cmd="ceph pg dump pgs_brief -f=json-pretty" local -r ceph_cmd_out=$(eval $ceph_cmd) local -r ceph_cmd_rc=$? if [[ $ceph_cmd_rc -ne 0 ]]; then diff --git a/qa/suites/rados/verify/clusters/fixed-4.yaml b/qa/suites/rados/verify/clusters/fixed-4.yaml new file mode 120000 index 00000000000..aa88300715a --- /dev/null +++ b/qa/suites/rados/verify/clusters/fixed-4.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-4.yaml
\ No newline at end of file diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py index 9b04e3dc675..8f666d2fa9b 100644 --- a/qa/tasks/ceph.py +++ b/qa/tasks/ceph.py @@ -1206,8 +1206,18 @@ def cluster(ctx, config): args.extend([ run.Raw('|'), 'head', '-n', '1', ]) - stdout = mon0_remote.sh(args) - return stdout or None + r = mon0_remote.run( + stdout=BytesIO(), + args=args, + stderr=StringIO(), + ) + stdout = r.stdout.getvalue().decode() + if stdout: + return stdout + stderr = r.stderr.getvalue() + if stderr: + return stderr + return None if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', config['log_ignorelist']) is not None: diff --git a/qa/tasks/cephadm.py b/qa/tasks/cephadm.py index dab61c2c700..0cde6050718 100644 --- a/qa/tasks/cephadm.py +++ b/qa/tasks/cephadm.py @@ -475,12 +475,16 @@ def ceph_log(ctx, config): run.Raw('|'), 'head', '-n', '1', ]) r = ctx.ceph[cluster_name].bootstrap_remote.run( - stdout=StringIO(), + stdout=BytesIO(), args=args, + stderr=StringIO(), ) - stdout = r.stdout.getvalue() - if stdout != '': + stdout = r.stdout.getvalue().decode() + if stdout: return stdout + stderr = r.stderr.getvalue() + if stderr: + return stderr return None # NOTE: technically the first and third arg to first_in_ceph_log diff --git a/qa/tasks/rook.py b/qa/tasks/rook.py index 6cb75173966..fae5ef3bf00 100644 --- a/qa/tasks/rook.py +++ b/qa/tasks/rook.py @@ -8,7 +8,7 @@ import json import logging import os import yaml -from io import BytesIO +from io import BytesIO, StringIO from tarfile import ReadError from tasks.ceph_manager import CephManager @@ -235,10 +235,14 @@ def ceph_log(ctx, config): r = ctx.rook[cluster_name].remote.run( stdout=BytesIO(), args=args, + stderr=StringIO(), ) stdout = r.stdout.getvalue().decode() if stdout: return stdout + stderr = r.stderr.getvalue() + if stderr: + return stderr return None if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', diff --git a/src/common/config_cacher.h b/src/common/config_cacher.h index 91b8152dde1..f23195955a1 100644 --- a/src/common/config_cacher.h +++ b/src/common/config_cacher.h @@ -18,21 +18,30 @@ #include "common/config_obs.h" #include "common/config.h" +/** + * A simple class to cache a single configuration value. + * Points to note: + * - as get_tracked_conf_keys() must return a pointer to a null-terminated + * array of C-strings, 'keys' - an array - is used to hold the sole key + * that this observer is interested in. + * - the const cast should be removed once we change the + * get_tracked_conf_keys() to return const char* const * (or something + * similar). + */ template <typename ValueT> class md_config_cacher_t : public md_config_obs_t { ConfigProxy& conf; - const char* const option_name; + const char* keys[2]; std::atomic<ValueT> value_cache; const char** get_tracked_conf_keys() const override { - const static char* keys[] = { option_name, nullptr }; - return keys; + return const_cast<const char**>(keys); } void handle_conf_change(const ConfigProxy& conf, const std::set<std::string>& changed) override { - if (changed.count(option_name)) { - value_cache.store(conf.get_val<ValueT>(option_name)); + if (changed.contains(keys[0])) { + value_cache.store(conf.get_val<ValueT>(keys[0])); } } @@ -40,10 +49,10 @@ public: md_config_cacher_t(ConfigProxy& conf, const char* const option_name) : conf(conf), - option_name(option_name) { + keys{option_name, nullptr} { conf.add_observer(this); std::atomic_init(&value_cache, - conf.get_val<ValueT>(option_name)); + conf.get_val<ValueT>(keys[0])); } ~md_config_cacher_t() { diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 5874a3dce56..e66b5aa08c7 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -4167,7 +4167,7 @@ void Server::handle_client_getattr(const MDRequestRef& mdr, bool is_lookup) if (r < 0) { // fall-thru. let rdlock_path_pin_ref() check again. - } else if (is_lookup) { + } else if (is_lookup && mdr->dn[0].size()) { CDentry* dn = mdr->dn[0].back(); mdr->pin(dn); auto em = dn->batch_ops.emplace(std::piecewise_construct, std::forward_as_tuple(mask), std::forward_as_tuple()); @@ -4274,7 +4274,7 @@ void Server::handle_client_getattr(const MDRequestRef& mdr, bool is_lookup) // reply dout(10) << "reply to stat on " << *req << dendl; mdr->tracei = ref; - if (is_lookup) + if (is_lookup && mdr->dn[0].size()) mdr->tracedn = mdr->dn[0].back(); respond_to_request(mdr, 0); } diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 7332ec3edb1..833bdddc71b 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -4024,7 +4024,7 @@ void Monitor::handle_command(MonOpRequestRef op) for (auto& p : mgrstatmon()->get_service_map().services) { auto &service = p.first; - if (ServiceMap::is_normal_ceph_entity(service)) { + if (ServiceMap::is_normal_ceph_entity(service) || service == "nvmeof") { continue; } f->open_object_section(service.c_str()); diff --git a/src/msg/async/AsyncMessenger.cc b/src/msg/async/AsyncMessenger.cc index 2e38bd434a8..6b3a8c3f6dc 100644 --- a/src/msg/async/AsyncMessenger.cc +++ b/src/msg/async/AsyncMessenger.cc @@ -207,22 +207,22 @@ void Processor::accept() } else if (r == -EAGAIN) { break; } else if (r == -EMFILE || r == -ENFILE) { - lderr(msgr->cct) << __func__ << " open file descriptions limit reached sd = " << listen_socket.fd() + lderr(msgr->cct) << __func__ << " open file descriptors limit reached fd = " << listen_socket.fd() << " errno " << r << " " << cpp_strerror(r) << dendl; if (++accept_error_num > msgr->cct->_conf->ms_max_accept_failures) { - lderr(msgr->cct) << "Proccessor accept has encountered enough error numbers, just do ceph_abort()." << dendl; + lderr(msgr->cct) << "Proccessor accept has encountered too many errors, just do ceph_abort()." << dendl; ceph_abort(); } continue; } else if (r == -ECONNABORTED) { - ldout(msgr->cct, 0) << __func__ << " it was closed because of rst arrived sd = " << listen_socket.fd() + ldout(msgr->cct, 0) << __func__ << " closed because of rst arrival fd = " << listen_socket.fd() << " errno " << r << " " << cpp_strerror(r) << dendl; continue; } else { lderr(msgr->cct) << __func__ << " no incoming connection?" << " errno " << r << " " << cpp_strerror(r) << dendl; if (++accept_error_num > msgr->cct->_conf->ms_max_accept_failures) { - lderr(msgr->cct) << "Proccessor accept has encountered enough error numbers, just do ceph_abort()." << dendl; + lderr(msgr->cct) << "Proccessor accept has encountered too many errors, just do ceph_abort()." << dendl; ceph_abort(); } continue; diff --git a/src/msg/async/EventEpoll.cc b/src/msg/async/EventEpoll.cc index 7ed5321dcda..eb04e3b8e98 100644 --- a/src/msg/async/EventEpoll.cc +++ b/src/msg/async/EventEpoll.cc @@ -17,6 +17,7 @@ #include "common/errno.h" #include <fcntl.h> #include "EventEpoll.h" +#include "Timeout.h" #define dout_subsys ceph_subsys_ms @@ -120,8 +121,7 @@ int EpollDriver::event_wait(std::vector<FiredFileEvent> &fired_events, struct ti { int retval, numevents = 0; - retval = epoll_wait(epfd, events, nevent, - tvp ? (tvp->tv_sec*1000 + tvp->tv_usec/1000) : -1); + retval = epoll_wait(epfd, events, nevent, timeout_to_milliseconds(tvp)); if (retval > 0) { numevents = retval; fired_events.resize(numevents); diff --git a/src/msg/async/EventPoll.cc b/src/msg/async/EventPoll.cc index 4c09dbb4db4..f46528715e3 100644 --- a/src/msg/async/EventPoll.cc +++ b/src/msg/async/EventPoll.cc @@ -15,6 +15,7 @@ #include "common/errno.h" #include "EventPoll.h" +#include "Timeout.h" #include <unistd.h> #define dout_subsys ceph_subsys_ms @@ -161,11 +162,9 @@ int PollDriver::event_wait(std::vector<FiredFileEvent> &fired_events, struct timeval *tvp) { int retval, numevents = 0; #ifdef _WIN32 - retval = WSAPoll(pfds, max_pfds, - tvp ? (tvp->tv_sec*1000 + tvp->tv_usec/1000) : -1); + retval = WSAPoll(pfds, max_pfds, timeout_to_milliseconds(tvp)); #else - retval = poll(pfds, max_pfds, - tvp ? (tvp->tv_sec*1000 + tvp->tv_usec/1000) : -1); + retval = poll(pfds, max_pfds, timeout_to_milliseconds(tvp)); #endif if (retval > 0) { for (int j = 0; j < max_pfds; j++) { diff --git a/src/msg/async/Timeout.h b/src/msg/async/Timeout.h new file mode 100644 index 00000000000..b8df1b40761 --- /dev/null +++ b/src/msg/async/Timeout.h @@ -0,0 +1,47 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2024 IONOS SE + * + * Author: Max Kellermann <max.kellermann@ionos.com> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_MSG_TIMEOUT_H +#define CEPH_MSG_TIMEOUT_H + +#include "include/intarith.h" // for div_round_up() + +#include <time.h> // for struct timeval + +/** + * Convert the given `struct timeval` to milliseconds. + * + * This is supposed to be used as timeout parameter to system calls + * such as poll() and epoll_wait(). + */ +constexpr int +timeout_to_milliseconds(const struct timeval &tv) noexcept +{ + /* round up to the next millisecond so we don't wake up too early */ + return tv.tv_sec * 1000 + div_round_up(tv.tv_usec, 1000); +} + +/** + * This overload makes the timeout optional; on nullptr, it returns + * -1. + */ +constexpr int +timeout_to_milliseconds(const struct timeval *tv) noexcept +{ + return tv != nullptr ? timeout_to_milliseconds(*tv) : -1; +} + +#endif diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 2f88acdc93b..50f293d45fd 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -3794,7 +3794,7 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/ if (offset > fnode.size) { ceph_abort_msg("truncate up not supported"); } - ceph_assert(offset <= fnode.size); + _flush_bdev(h); { std::lock_guard ll(log.lock); @@ -3803,44 +3803,42 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/ vselector->sub_usage(h->file->vselector_hint, fnode); uint64_t x_off = 0; auto p = fnode.seek(offset, &x_off); - uint64_t cut_off = - (p == fnode.extents.end()) ? 0 : p2roundup(x_off, alloc_size[p->bdev]); - uint64_t new_allocated; - if (0 == cut_off) { - // whole pextent to remove - changed_extents = true; - new_allocated = offset; - } else if (cut_off < p->length) { - dirty.pending_release[p->bdev].insert(p->offset + cut_off, p->length - cut_off); - new_allocated = (offset - x_off) + cut_off; - p->length = cut_off; - changed_extents = true; - ++p; - } else { - ceph_assert(cut_off >= p->length); - new_allocated = (offset - x_off) + p->length; - // just leave it here - ++p; - } - while (p != fnode.extents.end()) { - dirty.pending_release[p->bdev].insert(p->offset, p->length); - p = fnode.extents.erase(p); - changed_extents = true; + if (p != fnode.extents.end()) { + uint64_t cut_off = p2roundup(x_off, alloc_size[p->bdev]); + if (0 == cut_off) { + // whole pextent to remove + fnode.allocated = offset; + changed_extents = true; + } else if (cut_off < p->length) { + dirty.pending_release[p->bdev].insert(p->offset + cut_off, + p->length - cut_off); + fnode.allocated = (offset - x_off) + cut_off; + p->length = cut_off; + changed_extents = true; + ++p; + } else { + // cut_off > p->length means that we misaligned the extent + ceph_assert(cut_off == p->length); + fnode.allocated = (offset - x_off) + p->length; + ++p; // leave extent untouched + } + while (p != fnode.extents.end()) { + dirty.pending_release[p->bdev].insert(p->offset, p->length); + p = fnode.extents.erase(p); + changed_extents = true; + } } if (changed_extents) { fnode.size = offset; - fnode.allocated = new_allocated; fnode.reset_delta(); fnode.recalc_allocated(); log.t.op_file_update(fnode); // sad, but is_dirty must be set to signal flushing of the log h->file->is_dirty = true; - } else { - if (offset != fnode.size) { - fnode.size = offset; - //skipping log.t.op_file_update_inc, it will be done by flush() - h->file->is_dirty = true; - } + } else if (offset != fnode.size) { + fnode.size = offset; + // skipping log.t.op_file_update_inc, it will be done by flush() + h->file->is_dirty = true; } vselector->add_usage(h->file->vselector_hint, fnode); } diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 25e6c4fe596..8f1d995fa8d 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -6930,8 +6930,19 @@ int BlueStore::_check_main_bdev_label() return -EIO; } if (bluestore_bdev_label_require_all && r != 0) { - derr << __func__ << " not all labels read properly" << dendl; - return -EIO; + // We are about to complain that some labels failed. + // But in case if we expanded block device some labels will not be good. + uint64_t lsize = std::max(BDEV_LABEL_BLOCK_SIZE, min_alloc_size); + uint32_t valid_locations = 0; + for (uint64_t loc : bdev_label_positions) { + if (loc + lsize <= bdev_label.size) { + ++valid_locations; + } + } + if (valid_locations != bdev_label_valid_locations.size()) { + derr << __func__ << " not all labels read properly" << dendl; + return -EIO; + } } return 0; } @@ -8967,11 +8978,25 @@ int BlueStore::expand_devices(ostream& out) _close_db_and_around(); // mount in read/write to sync expansion changes + if (bdev_label_multi) { + // We need not do fsck, because we can be broken - size is increased, + // but we might not have labels set. + cct->_conf.set_val_or_die("bluestore_fsck_on_mount", "false"); + } r = _mount(); ceph_assert(r == 0); if (fm && fm->is_null_manager()) { // we grow the allocation range, must reflect it in the allocation file alloc->init_add_free(size0, size - size0); + if (bdev_label_multi) { + uint64_t lsize = std::max(BDEV_LABEL_BLOCK_SIZE, min_alloc_size); + for (uint64_t loc : bdev_label_positions) { + if ((loc >= size0) && (loc + lsize <= size)) { + bdev_label_valid_locations.push_back(loc); + } + } + _write_bdev_label(cct, bdev, path + "/block", bdev_label, bdev_label_valid_locations); + } need_to_destage_allocation_file = true; } umount(); diff --git a/src/pybind/mgr/cephadm/services/nvmeof.py b/src/pybind/mgr/cephadm/services/nvmeof.py index b3fd526815e..8acec94f382 100644 --- a/src/pybind/mgr/cephadm/services/nvmeof.py +++ b/src/pybind/mgr/cephadm/services/nvmeof.py @@ -47,6 +47,7 @@ class NvmeofService(CephService): # TODO: check if we can force jinja2 to generate dicts with double quotes instead of using json.dumps transport_tcp_options = json.dumps(spec.transport_tcp_options) if spec.transport_tcp_options else None + iobuf_options = json.dumps(spec.iobuf_options) if spec.iobuf_options else None name = '{}.{}'.format(utils.name_to_config_section('nvmeof'), nvmeof_gw_id) rados_id = name[len('client.'):] if name.startswith('client.') else name @@ -67,6 +68,7 @@ class NvmeofService(CephService): 'rpc_socket_dir': '/var/tmp/', 'rpc_socket_name': 'spdk.sock', 'transport_tcp_options': transport_tcp_options, + 'iobuf_options': iobuf_options, 'rados_id': rados_id } gw_conf = self.mgr.template.render('services/nvmeof/ceph-nvmeof.conf.j2', context) diff --git a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 index 37f2db52732..2a9ab309568 100644 --- a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 @@ -86,6 +86,9 @@ transport_tcp_options = {{ transport_tcp_options }} {% if spec.tgt_cmd_extra_args %} tgt_cmd_extra_args = {{ spec.tgt_cmd_extra_args }} {% endif %} +{% if iobuf_options %} +iobuf_options = {{ iobuf_options }} +{% endif %} [monitor] timeout = {{ spec.monitor_timeout }} diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index 1ac9fa49e32..6869d5b2188 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -1384,6 +1384,7 @@ class NvmeofServiceSpec(ServiceSpec): transport_tcp_options: Optional[Dict[str, int]] = {"in_capsule_data_size": 8192, "max_io_qpairs_per_ctrlr": 7}, tgt_cmd_extra_args: Optional[str] = None, + iobuf_options: Optional[Dict[str, int]] = None, discovery_addr: Optional[str] = None, discovery_addr_map: Optional[Dict[str, str]] = None, discovery_port: Optional[int] = None, @@ -1520,6 +1521,8 @@ class NvmeofServiceSpec(ServiceSpec): self.transport_tcp_options: Optional[Dict[str, int]] = transport_tcp_options #: ``tgt_cmd_extra_args`` extra arguments for the nvmf_tgt process self.tgt_cmd_extra_args = tgt_cmd_extra_args + #: List of extra arguments for SPDK iobuf in the form opt=value + self.iobuf_options: Optional[Dict[str, int]] = iobuf_options #: ``discovery_addr`` address of the discovery service self.discovery_addr = discovery_addr #: ``discovery_addr_map`` per node address map of the discovery service diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc index 69075c506f1..a183feabe2a 100644 --- a/src/rgw/driver/rados/rgw_rados.cc +++ b/src/rgw/driver/rados/rgw_rados.cc @@ -8951,7 +8951,7 @@ int RGWRados::get_olh(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, return r; } - auto iter = attrset.find(RGW_ATTR_OLH_VER); + auto iter = attrset.find(RGW_ATTR_OLH_INFO); if (iter == attrset.end()) { /* not an olh */ return -EINVAL; } diff --git a/src/rgw/driver/rados/rgw_sal_rados.cc b/src/rgw/driver/rados/rgw_sal_rados.cc index 4c67d0ee71a..4c05421653b 100644 --- a/src/rgw/driver/rados/rgw_sal_rados.cc +++ b/src/rgw/driver/rados/rgw_sal_rados.cc @@ -429,6 +429,10 @@ int RadosBucket::remove(const DoutPrefixProvider* dpp, ldpp_dout(dpp, -1) << "ERROR: unable to remove notifications from bucket. ret=" << ps_ret << dendl; } + if (ret = rgw::bucketlogging::bucket_deletion_cleanup(dpp, store, this, y); ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: could not cleanup bucket logging configuration and pending objects, ret = " << ret << dendl; + } + ret = store->ctl()->bucket->unlink_bucket(rados, info.owner, info.bucket, y, dpp, false); if (ret < 0) { @@ -1024,15 +1028,15 @@ int RadosBucket::remove_topics(RGWObjVersionTracker* objv_tracker, objv_tracker, y); } -int RadosBucket::get_logging_object_name(std::string& obj_name, - const std::string& prefix, - optional_yield y, +int RadosBucket::get_logging_object_name(std::string& obj_name, + const std::string& prefix, + optional_yield y, const DoutPrefixProvider *dpp, RGWObjVersionTracker* objv_tracker) { rgw_pool data_pool; const auto obj_name_oid = bucketlogging::object_name_oid(this, prefix); if (!store->getRados()->get_obj_data_pool(get_placement_rule(), rgw_obj{get_key(), obj_name_oid}, &data_pool)) { - ldpp_dout(dpp, 1) << "failed to get data pool for bucket '" << get_name() << + ldpp_dout(dpp, 1) << "ERROR: failed to get data pool for bucket '" << get_name() << "' when getting logging object name" << dendl; return -EIO; } @@ -1048,23 +1052,23 @@ int RadosBucket::get_logging_object_name(std::string& obj_name, nullptr, nullptr); if (ret < 0) { - ldpp_dout(dpp, 1) << "failed to get logging object name from '" << obj_name_oid << "'. ret = " << ret << dendl; + ldpp_dout(dpp, 1) << "ERROR: failed to get logging object name from '" << obj_name_oid << "'. ret = " << ret << dendl; return ret; } obj_name = bl.to_str(); return 0; } -int RadosBucket::set_logging_object_name(const std::string& obj_name, - const std::string& prefix, - optional_yield y, - const DoutPrefixProvider *dpp, +int RadosBucket::set_logging_object_name(const std::string& obj_name, + const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, bool new_obj, RGWObjVersionTracker* objv_tracker) { rgw_pool data_pool; const auto obj_name_oid = bucketlogging::object_name_oid(this, prefix); if (!store->getRados()->get_obj_data_pool(get_placement_rule(), rgw_obj{get_key(), obj_name_oid}, &data_pool)) { - ldpp_dout(dpp, 1) << "failed to get data pool for bucket '" << get_name() << + ldpp_dout(dpp, 1) << "ERROR: failed to get data pool for bucket '" << get_name() << "' when setting logging object name" << dendl; return -EIO; } @@ -1080,28 +1084,65 @@ int RadosBucket::set_logging_object_name(const std::string& obj_name, y, nullptr); if (ret == -EEXIST) { - ldpp_dout(dpp, 20) << "race detected in initializing '" << obj_name_oid << "' with logging object name:'" << obj_name << "'. ret = " << ret << dendl; + ldpp_dout(dpp, 20) << "INFO: race detected in initializing '" << obj_name_oid << "' with logging object name:'" << obj_name << "'. ret = " << ret << dendl; } else if (ret == -ECANCELED) { - ldpp_dout(dpp, 20) << "race detected in updating logging object name '" << obj_name << "' at '" << obj_name_oid << "'. ret = " << ret << dendl; + ldpp_dout(dpp, 20) << "INFO: race detected in updating logging object name '" << obj_name << "' at '" << obj_name_oid << "'. ret = " << ret << dendl; } else if (ret < 0) { - ldpp_dout(dpp, 1) << "failed to set logging object name '" << obj_name << "' at '" << obj_name_oid << "'. ret = " << ret << dendl; + ldpp_dout(dpp, 1) << "ERROR: failed to set logging object name '" << obj_name << "' at '" << obj_name_oid << "'. ret = " << ret << dendl; } return ret; } +int RadosBucket::remove_logging_object_name(const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, + RGWObjVersionTracker* objv_tracker) { + rgw_pool data_pool; + const auto obj_name_oid = bucketlogging::object_name_oid(this, prefix); + if (!store->getRados()->get_obj_data_pool(get_placement_rule(), rgw_obj{get_key(), obj_name_oid}, &data_pool)) { + ldpp_dout(dpp, 1) << "ERROR: failed to get data pool for bucket '" << get_name() << + "' when setting logging object name" << dendl; + return -EIO; + } + return rgw_delete_system_obj(dpp, store->svc()->sysobj, + data_pool, + obj_name_oid, + objv_tracker, + y); +} + std::string to_temp_object_name(const rgw::sal::Bucket* bucket, const std::string& obj_name) { return fmt::format("{}__shadow_{}0", bucket->get_bucket_id(), obj_name); } +int RadosBucket::remove_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp) { + rgw_pool data_pool; + const rgw_obj head_obj{get_key(), obj_name}; + const auto placement_rule = get_placement_rule(); + + if (!store->getRados()->get_obj_data_pool(placement_rule, head_obj, &data_pool)) { + ldpp_dout(dpp, 1) << "ERROR: failed to get data pool for bucket '" << get_name() << + "' when deleting logging object" << dendl; + return -EIO; + } + + const auto temp_obj_name = to_temp_object_name(this, obj_name); + return rgw_delete_system_obj(dpp, store->svc()->sysobj, + data_pool, + temp_obj_name, + nullptr, + y); +} + int RadosBucket::commit_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp) { rgw_pool data_pool; const rgw_obj head_obj{get_key(), obj_name}; const auto placement_rule = get_placement_rule(); if (!store->getRados()->get_obj_data_pool(placement_rule, head_obj, &data_pool)) { - ldpp_dout(dpp, 1) << "failed to get data pool for bucket '" << get_name() << + ldpp_dout(dpp, 1) << "ERROR: failed to get data pool for bucket '" << get_name() << "' when comitting logging object" << dendl; return -EIO; } @@ -1110,7 +1151,6 @@ int RadosBucket::commit_logging_object(const std::string& obj_name, optional_yie std::map<string, bufferlist> obj_attrs; ceph::real_time mtime; bufferlist bl_data; - // TODO: this is needed only for etag calculation if (const auto ret = rgw_get_system_obj(store->svc()->sysobj, data_pool, temp_obj_name, @@ -1120,10 +1160,13 @@ int RadosBucket::commit_logging_object(const std::string& obj_name, optional_yie y, dpp, &obj_attrs, - nullptr); ret < 0) { - ldpp_dout(dpp, 1) << "faild to read logging data when comitting to object '" << temp_obj_name + nullptr); ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 1) << "ERROR: failed to read logging data when comitting object '" << temp_obj_name << ". error: " << ret << dendl; return ret; + } else if (ret == -ENOENT) { + ldpp_dout(dpp, 1) << "WARNING: temporary logging object '" << temp_obj_name << "' does not exists" << dendl; + return 0; } uint64_t size = bl_data.length(); @@ -1137,13 +1180,13 @@ int RadosBucket::commit_logging_object(const std::string& obj_name, optional_yie nullptr, // no special placment for tail get_key(), head_obj); ret < 0) { - ldpp_dout(dpp, 1) << "failed to create manifest when comitting logging object. error: " << + ldpp_dout(dpp, 1) << "ERROR: failed to create manifest when comitting logging object. error: " << ret << dendl; return ret; } if (const auto ret = manifest_gen.create_next(size); ret < 0) { - ldpp_dout(dpp, 1) << "failed to add object to manifest when comitting logging object. error: " << + ldpp_dout(dpp, 1) << "ERROR: failed to add object to manifest when comitting logging object. error: " << ret << dendl; return ret; } @@ -1151,7 +1194,7 @@ int RadosBucket::commit_logging_object(const std::string& obj_name, optional_yie if (const auto expected_temp_obj = manifest_gen.get_cur_obj(store->getRados()); temp_obj_name != expected_temp_obj.oid) { // TODO: cleanup temporary object, commit would never succeed - ldpp_dout(dpp, 1) << "temporary logging object name mismatch: '" << + ldpp_dout(dpp, 1) << "ERROR: temporary logging object name mismatch: '" << temp_obj_name << "' != '" << expected_temp_obj.oid << "'" << dendl; return -EINVAL; } @@ -1182,11 +1225,11 @@ int RadosBucket::commit_logging_object(const std::string& obj_name, optional_yie const req_context rctx{dpp, y, nullptr}; jspan_context trace{false, false}; if (const auto ret = head_obj_wop.write_meta(0, size, obj_attrs, rctx, trace); ret < 0) { - ldpp_dout(dpp, 1) << "failed to commit logging object '" << temp_obj_name << - "' to bucket id '" << get_bucket_id() <<"'. error: " << ret << dendl; + ldpp_dout(dpp, 1) << "ERROR: failed to commit logging object '" << temp_obj_name << + "' to bucket id '" << get_info().bucket <<"'. error: " << ret << dendl; return ret; } - ldpp_dout(dpp, 20) << "committed logging object '" << temp_obj_name << + ldpp_dout(dpp, 20) << "INFO: committed logging object '" << temp_obj_name << "' with size of " << size << " bytes, to bucket '" << get_key() << "' as '" << obj_name << "'" << dendl; return 0; @@ -1204,30 +1247,30 @@ void bucket_logging_completion(rados_completion_t completion, void* args) { auto* aio_comp = reinterpret_cast<librados::AioCompletionImpl*>(completion); std::unique_ptr<BucketLoggingCompleteArg> logging_args(reinterpret_cast<BucketLoggingCompleteArg*>(args)); if (aio_comp->get_return_value() < 0) { - ldout(logging_args->cct, 1) << "failed to complete append to logging object '" << logging_args->obj_name << + ldout(logging_args->cct, 1) << "ERROR: failed to complete append to logging object '" << logging_args->obj_name << "'. ret = " << aio_comp->get_return_value() << dendl; } else { - ldout(logging_args->cct, 20) << "wrote " << logging_args->size << " bytes to logging object '" << + ldout(logging_args->cct, 20) << "INFO: wrote " << logging_args->size << " bytes to logging object '" << logging_args->obj_name << "'" << dendl; } } -int RadosBucket::write_logging_object(const std::string& obj_name, - const std::string& record, - optional_yield y, +int RadosBucket::write_logging_object(const std::string& obj_name, + const std::string& record, + optional_yield y, const DoutPrefixProvider *dpp, bool async_completion) { const auto temp_obj_name = to_temp_object_name(this, obj_name); rgw_pool data_pool; rgw_obj obj{get_key(), obj_name}; if (!store->getRados()->get_obj_data_pool(get_placement_rule(), obj, &data_pool)) { - ldpp_dout(dpp, 1) << "failed to get data pool for bucket '" << get_name() << + ldpp_dout(dpp, 1) << "ERROR: failed to get data pool for bucket '" << get_name() << "' when writing logging object" << dendl; return -EIO; } librados::IoCtx io_ctx; if (const auto ret = rgw_init_ioctx(dpp, store->getRados()->get_rados_handle(), data_pool, io_ctx); ret < 0) { - ldpp_dout(dpp, 1) << "failed to get IO context for logging object from data pool:" << data_pool.to_str() << dendl; + ldpp_dout(dpp, 1) << "ERROR: failed to get IO context for logging object from data pool:" << data_pool.to_str() << dendl; return -EIO; } bufferlist bl; @@ -1242,7 +1285,7 @@ int RadosBucket::write_logging_object(const std::string& obj_name, auto arg = std::make_unique<BucketLoggingCompleteArg>(temp_obj_name, record.length(), store->ctx()); completion->set_complete_callback(arg.get(), bucket_logging_completion); if (const auto ret = io_ctx.aio_operate(temp_obj_name, completion.get(), &op); ret < 0) { - ldpp_dout(dpp, 1) << "failed to append to logging object '" << temp_obj_name << + ldpp_dout(dpp, 1) << "ERROR: failed to append to logging object '" << temp_obj_name << "'. ret = " << ret << dendl; return ret; } @@ -1251,11 +1294,11 @@ int RadosBucket::write_logging_object(const std::string& obj_name, return 0; } if (const auto ret = rgw_rados_operate(dpp, io_ctx, temp_obj_name, &op, y); ret < 0) { - ldpp_dout(dpp, 1) << "failed to append to logging object '" << temp_obj_name << + ldpp_dout(dpp, 1) << "ERROR: failed to append to logging object '" << temp_obj_name << "'. ret = " << ret << dendl; return ret; } - ldpp_dout(dpp, 20) << "wrote " << record.length() << " bytes to logging object '" << + ldpp_dout(dpp, 20) << "INFO: wrote " << record.length() << " bytes to logging object '" << temp_obj_name << "'" << dendl; return 0; } diff --git a/src/rgw/driver/rados/rgw_sal_rados.h b/src/rgw/driver/rados/rgw_sal_rados.h index 85ea247e345..e65c3c0050e 100644 --- a/src/rgw/driver/rados/rgw_sal_rados.h +++ b/src/rgw/driver/rados/rgw_sal_rados.h @@ -780,18 +780,23 @@ class RadosBucket : public StoreBucket { optional_yield y, const DoutPrefixProvider *dpp) override; int remove_topics(RGWObjVersionTracker* objv_tracker, optional_yield y, const DoutPrefixProvider *dpp) override; - int get_logging_object_name(std::string& obj_name, - const std::string& prefix, - optional_yield y, - const DoutPrefixProvider *dpp, + int get_logging_object_name(std::string& obj_name, + const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, + RGWObjVersionTracker* objv_tracker) override; + int set_logging_object_name(const std::string& obj_name, + const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, + bool new_obj, RGWObjVersionTracker* objv_tracker) override; - int set_logging_object_name(const std::string& obj_name, - const std::string& prefix, - optional_yield y, - const DoutPrefixProvider *dpp, - bool new_obj, + int remove_logging_object_name(const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, RGWObjVersionTracker* objv_tracker) override; int commit_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp) override; + int remove_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp) override; int write_logging_object(const std::string& obj_name, const std::string& record, optional_yield y, const DoutPrefixProvider *dpp, bool async_completion) override; private: diff --git a/src/rgw/driver/rados/rgw_user.cc b/src/rgw/driver/rados/rgw_user.cc index 894d8e40950..cce593c6bd5 100644 --- a/src/rgw/driver/rados/rgw_user.cc +++ b/src/rgw/driver/rados/rgw_user.cc @@ -189,6 +189,11 @@ static void dump_user_info(Formatter *f, RGWUserInfo &info, } encode_json("type", user_source_type, f); encode_json("mfa_ids", info.mfa_ids, f); + encode_json("account_id", info.account_id, f); + encode_json("path", info.path, f); + encode_json("create_date", info.create_date, f); + encode_json("tags", info.tags, f); + encode_json("group_ids", info.group_ids, f); if (stats) { encode_json("stats", *stats, f); } diff --git a/src/rgw/radosgw-admin/radosgw-admin.cc b/src/rgw/radosgw-admin/radosgw-admin.cc index 182e42b8e31..13936c87952 100644 --- a/src/rgw/radosgw-admin/radosgw-admin.cc +++ b/src/rgw/radosgw-admin/radosgw-admin.cc @@ -171,7 +171,8 @@ void usage() cout << " bucket sync disable disable bucket sync\n"; cout << " bucket sync enable enable bucket sync\n"; cout << " bucket radoslist list rados objects backing bucket's objects\n"; - cout << " bucket logging flush flush pending log records object of source bucket to the log bucket to bucket\n"; + cout << " bucket logging flush flush pending log records object of source bucket to the log bucket\n"; + cout << " bucket logging info get info on bucket logging configuration on source bucket or list of sources in log bucket\n"; cout << " bi get retrieve bucket index object entries\n"; cout << " bi put store bucket index object entries\n"; cout << " bi list list raw bucket index entries\n"; @@ -701,6 +702,7 @@ enum class OPT { BUCKET_OBJECT_SHARD, BUCKET_RESYNC_ENCRYPTED_MULTIPART, BUCKET_LOGGING_FLUSH, + BUCKET_LOGGING_INFO, POLICY, LOG_LIST, LOG_SHOW, @@ -940,6 +942,7 @@ static SimpleCmd::Commands all_cmds = { { "bucket object shard", OPT::BUCKET_OBJECT_SHARD }, { "bucket resync encrypted multipart", OPT::BUCKET_RESYNC_ENCRYPTED_MULTIPART }, { "bucket logging flush", OPT::BUCKET_LOGGING_FLUSH }, + { "bucket logging info", OPT::BUCKET_LOGGING_INFO }, { "policy", OPT::POLICY }, { "log list", OPT::LOG_LIST }, { "log show", OPT::LOG_SHOW }, @@ -2540,8 +2543,8 @@ static void sync_status(Formatter *formatter) struct indented { int w; // indent width - std::string_view header; - indented(int w, std::string_view header = "") : w(w), header(header) {} + std::string header; + indented(int w, std::string header = "") : w(w), header(header) {} }; std::ostream& operator<<(std::ostream& out, const indented& h) { return out << std::setw(h.w) << h.header << std::setw(1) << ' '; @@ -2549,10 +2552,10 @@ std::ostream& operator<<(std::ostream& out, const indented& h) { struct bucket_source_sync_info { const RGWZone& _source; - std::string_view error; + std::string error; std::map<int,std::string> shards_behind; int total_shards; - std::string_view status; + std::string status; rgw_bucket bucket_source; bucket_source_sync_info(const RGWZone& source): _source(source) {} @@ -3072,14 +3075,12 @@ static int bucket_sync_status(rgw::sal::Driver* driver, const RGWBucketInfo& inf } if (pipe.source.zone.value_or(rgw_zone_id()) == z->second.id) { bucket_source_sync_info source_sync_info(z->second); - auto ret = bucket_source_sync_status(dpp(), static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->zone->get_zone(), z->second, + bucket_source_sync_status(dpp(), static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->zone->get_zone(), z->second, c->second, info, pipe, source_sync_info); - if (ret == 0) { - bucket_sync_info.source_status_info.emplace_back(std::move(source_sync_info)); - } + bucket_sync_info.source_status_info.emplace_back(std::move(source_sync_info)); } } } @@ -7750,6 +7751,47 @@ int main(int argc, const char **argv) return 0; } + if (opt_cmd == OPT::BUCKET_LOGGING_INFO) { + if (bucket_name.empty()) { + cerr << "ERROR: bucket not specified" << std::endl; + return EINVAL; + } + int ret = init_bucket(tenant, bucket_name, bucket_id, &bucket); + if (ret < 0) { + return -ret; + } + const auto& bucket_attrs = bucket->get_attrs(); + auto iter = bucket_attrs.find(RGW_ATTR_BUCKET_LOGGING); + if (iter != bucket_attrs.end()) { + rgw::bucketlogging::configuration configuration; + try { + configuration.enabled = true; + decode(configuration, iter->second); + } catch (buffer::error& err) { + cerr << "ERROR: failed to decode logging attribute '" << RGW_ATTR_BUCKET_LOGGING + << "'. error: " << err.what() << std::endl; + return EINVAL; + } + encode_json("logging", configuration, formatter.get()); + formatter->flush(cout); + } + iter = bucket_attrs.find(RGW_ATTR_BUCKET_LOGGING_SOURCES); + if (iter != bucket_attrs.end()) { + rgw::bucketlogging::source_buckets sources; + try { + decode(sources, iter->second); + } catch (buffer::error& err) { + cerr << "ERROR: failed to decode logging sources attribute '" << RGW_ATTR_BUCKET_LOGGING_SOURCES + << "'. error: " << err.what() << std::endl; + return EINVAL; + } + encode_json("logging_sources", sources, formatter.get()); + formatter->flush(cout); + } + + return 0; + } + if (opt_cmd == OPT::LOG_LIST) { // filter by date? if (date.size() && date.size() != 10) { diff --git a/src/rgw/rgw_bucket_logging.cc b/src/rgw/rgw_bucket_logging.cc index d24a53024f1..dd407f26e8c 100644 --- a/src/rgw/rgw_bucket_logging.cc +++ b/src/rgw/rgw_bucket_logging.cc @@ -192,7 +192,7 @@ ceph::coarse_real_time time_from_name(const std::string& obj_name, const DoutPre ldpp_dout(dpp, 1) << "ERROR: logging object name too short: " << obj_name << dendl; return extracted_time; } - const auto time_start_pos = obj_name_length - (time_format_length + UniqueStringLength + 1); + const auto time_start_pos = obj_name_length - (time_format_length + UniqueStringLength + 1); // note: +1 is for the dash between the timestamp and the unique string std::string time_str = obj_name.substr(time_start_pos, time_format_length); @@ -206,6 +206,13 @@ ceph::coarse_real_time time_from_name(const std::string& obj_name, const DoutPre return extracted_time; } +std::string full_bucket_name(const std::unique_ptr<rgw::sal::Bucket>& bucket) { + if (bucket->get_tenant().empty()) { + return bucket->get_name(); + } + return fmt::format("{}:{}", bucket->get_tenant(), bucket->get_name()); +} + int new_logging_object(const configuration& conf, const std::unique_ptr<rgw::sal::Bucket>& bucket, std::string& obj_name, @@ -235,23 +242,22 @@ int new_logging_object(const configuration& conf, conf.target_prefix, to_string(bucket->get_owner()), source_region, - bucket->get_name(), + full_bucket_name(bucket), t, t, unique); } break; } - int ret = bucket->set_logging_object_name(obj_name, conf.target_prefix, y, dpp, init_obj, objv_tracker); if (ret == -EEXIST || ret == -ECANCELED) { if (ret = bucket->get_logging_object_name(obj_name, conf.target_prefix, y, dpp, nullptr); ret < 0) { ldpp_dout(dpp, 1) << "ERROR: failed to get name of logging object of bucket '" << - conf.target_bucket << "'. ret = " << ret << dendl; + conf.target_bucket << "' and prefix '" << conf.target_prefix << "', ret = " << ret << dendl; return ret; } ldpp_dout(dpp, 20) << "INFO: name already set. got name of logging object '" << obj_name << "' of bucket '" << - conf.target_bucket << "'" << dendl; + conf.target_bucket << "' and prefix '" << conf.target_prefix << "'" << dendl; return -ECANCELED; } else if (ret < 0) { ldpp_dout(dpp, 1) << "ERROR: failed to write name of logging object '" << obj_name << "' of bucket '" << @@ -263,6 +269,44 @@ int new_logging_object(const configuration& conf, return 0; } +int commit_logging_object(const configuration& conf, + const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + const std::string& tenant_name, + optional_yield y) { + std::string target_bucket_name; + std::string target_tenant_name; + auto ret = rgw_parse_url_bucket(conf.target_bucket, tenant_name, target_tenant_name, target_bucket_name); + if (ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: failed to parse target bucket '" << conf.target_bucket << "' when commiting logging object, ret = " + << ret << dendl; + return ret; + } + const rgw_bucket target_bucket_id(target_tenant_name, target_bucket_name); + std::unique_ptr<rgw::sal::Bucket> target_bucket; + ret = driver->load_bucket(dpp, target_bucket_id, + &target_bucket, y); + if (ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: failed to get target logging bucket '" << target_bucket_id << "' when commiting logging object, ret = " + << ret << dendl; + return ret; + } + return commit_logging_object(conf, target_bucket, dpp, y); +} + +int commit_logging_object(const configuration& conf, + const std::unique_ptr<rgw::sal::Bucket>& target_bucket, + const DoutPrefixProvider *dpp, + optional_yield y) { + std::string obj_name; + if (const auto ret = target_bucket->get_logging_object_name(obj_name, conf.target_prefix, y, dpp, nullptr); ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: failed to get name of logging object of bucket '" << + target_bucket->get_info().bucket << "'. ret = " << ret << dendl; + return ret; + } + return target_bucket->commit_logging_object(obj_name, y, dpp); +} + int rollover_logging_object(const configuration& conf, const std::unique_ptr<rgw::sal::Bucket>& bucket, std::string& obj_name, @@ -270,12 +314,16 @@ int rollover_logging_object(const configuration& conf, optional_yield y, bool must_commit, RGWObjVersionTracker* objv_tracker) { - if (conf.target_bucket != bucket->get_name()) { - ldpp_dout(dpp, 1) << "ERROR: bucket name mismatch: '" << conf.target_bucket << "' != '" << bucket->get_name() << "'" << dendl; + std::string target_bucket_name; + std::string target_tenant_name; + std::ignore = rgw_parse_url_bucket(conf.target_bucket, bucket->get_tenant(), target_tenant_name, target_bucket_name); + if (target_bucket_name != bucket->get_name() || target_tenant_name != bucket->get_tenant()) { + ldpp_dout(dpp, 1) << "ERROR: bucket name mismatch. conf= '" << conf.target_bucket << + "', bucket= '" << bucket->get_info().bucket << "'" << dendl; return -EINVAL; } const auto old_obj = obj_name; - const auto ret = new_logging_object(conf, bucket, obj_name, dpp, y, false, objv_tracker); + const auto ret = new_logging_object(conf, bucket, obj_name, dpp, y, false, objv_tracker); if (ret == -ECANCELED) { ldpp_dout(dpp, 20) << "INFO: rollover already performed for '" << old_obj << "' to bucket '" << conf.target_bucket << "'. ret = " << ret << dendl; @@ -342,14 +390,14 @@ S3 bucket short (ceph) log record - eTag };*/ -int log_record(rgw::sal::Driver* driver, +int log_record(rgw::sal::Driver* driver, const sal::Object* obj, - const req_state* s, - const std::string& op_name, - const std::string& etag, + const req_state* s, + const std::string& op_name, + const std::string& etag, size_t size, const configuration& conf, - const DoutPrefixProvider *dpp, + const DoutPrefixProvider *dpp, optional_yield y, bool async_completion, bool log_source_bucket) { @@ -357,11 +405,19 @@ int log_record(rgw::sal::Driver* driver, ldpp_dout(dpp, 1) << "ERROR: only bucket operations are logged" << dendl; return -EINVAL; } + std::string target_bucket_name; + std::string target_tenant_name; + auto ret = rgw_parse_url_bucket(conf.target_bucket, s->bucket_tenant, target_tenant_name, target_bucket_name); + if (ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: failed to parse target bucket '" << conf.target_bucket << "', ret = " << ret << dendl; + return ret; + } + const rgw_bucket target_bucket_id(target_tenant_name, target_bucket_name); std::unique_ptr<rgw::sal::Bucket> target_bucket; - auto ret = driver->load_bucket(dpp, rgw_bucket(s->bucket_tenant, conf.target_bucket), + ret = driver->load_bucket(dpp, target_bucket_id, &target_bucket, y); if (ret < 0) { - ldpp_dout(dpp, 1) << "ERROR: failed to get target logging bucket '" << conf.target_bucket << "'. ret = " << ret << dendl; + ldpp_dout(dpp, 1) << "ERROR: failed to get target logging bucket '" << target_bucket_id << "'. ret = " << ret << dendl; return ret; } std::string obj_name; @@ -382,12 +438,14 @@ int log_record(rgw::sal::Driver* driver, // try to create the temporary log object for the first time ret = new_logging_object(conf, target_bucket, obj_name, dpp, y, true, nullptr); if (ret == 0) { - ldpp_dout(dpp, 20) << "INFO: first time logging for bucket '" << conf.target_bucket << "'" << dendl; + ldpp_dout(dpp, 20) << "INFO: first time logging for bucket '" << conf.target_bucket << "' and prefix '" << + conf.target_prefix << "'" << dendl; } else if (ret == -ECANCELED) { - ldpp_dout(dpp, 20) << "INFO: logging object '" << obj_name << "' already exists for bucket '" << conf.target_bucket << "', will be used" << dendl; + ldpp_dout(dpp, 20) << "INFO: logging object '" << obj_name << "' already exists for bucket '" << conf.target_bucket << "' and prefix" << + conf.target_prefix << "'" << dendl; } else { ldpp_dout(dpp, 1) << "ERROR: failed to create logging object of bucket '" << - conf.target_bucket << "' for the first time. ret = " << ret << dendl; + conf.target_bucket << "' and prefix '" << conf.target_prefix << "' for the first time. ret = " << ret << dendl; return ret; } } else { @@ -420,7 +478,7 @@ int log_record(rgw::sal::Driver* driver, bucket_name = s->src_bucket_name; } else { bucket_owner = to_string( s->bucket->get_owner()); - bucket_name = s->bucket->get_name(); + bucket_name = full_bucket_name(s->bucket); } switch (conf.logging_type) { @@ -459,7 +517,7 @@ int log_record(rgw::sal::Driver* driver, case LoggingType::Journal: record = fmt::format("{} {} [{:%d/%b/%Y:%H:%M:%S %z}] {} {} {} {} {}", dash_if_empty(to_string(s->bucket->get_owner())), - dash_if_empty(s->bucket->get_name()), + dash_if_empty(full_bucket_name(s->bucket)), t, op_name, dash_if_empty_or_null(obj, obj->get_name()), @@ -512,12 +570,12 @@ std::string object_name_oid(const rgw::sal::Bucket* bucket, const std::string& p int log_record(rgw::sal::Driver* driver, LoggingType type, const sal::Object* obj, - const req_state* s, - const std::string& op_name, - const std::string& etag, - size_t size, - const DoutPrefixProvider *dpp, - optional_yield y, + const req_state* s, + const std::string& op_name, + const std::string& etag, + size_t size, + const DoutPrefixProvider *dpp, + optional_yield y, bool async_completion, bool log_source_bucket) { if (!s->bucket) { @@ -534,7 +592,7 @@ int log_record(rgw::sal::Driver* driver, try { configuration.enabled = true; auto bl_iter = iter->second.cbegin(); - decode(configuration, bl_iter); + decode(configuration, bl_iter); if (type != LoggingType::Any && configuration.logging_type != type) { return 0; } @@ -543,20 +601,199 @@ int log_record(rgw::sal::Driver* driver, return 0; } } - ldpp_dout(dpp, 20) << "INFO: found matching logging configuration of bucket '" << s->bucket->get_name() << + ldpp_dout(dpp, 20) << "INFO: found matching logging configuration of bucket '" << s->bucket->get_info().bucket << "' configuration: " << configuration.to_json_str() << dendl; - if (auto ret = log_record(driver, obj, s, op_name, etag, size, configuration, dpp, y, async_completion, log_source_bucket); ret < 0) { - ldpp_dout(dpp, 1) << "ERROR: failed to perform logging for bucket '" << s->bucket->get_name() << + if (auto ret = log_record(driver, obj, s, op_name, etag, size, configuration, dpp, y, async_completion, log_source_bucket); ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: failed to perform logging for bucket '" << s->bucket->get_info().bucket << "'. ret=" << ret << dendl; return ret; } } catch (buffer::error& err) { - ldpp_dout(dpp, 1) << "ERROR: failed to decode logging attribute '" << RGW_ATTR_BUCKET_LOGGING + ldpp_dout(dpp, 1) << "ERROR: failed to decode logging attribute '" << RGW_ATTR_BUCKET_LOGGING << "'. error: " << err.what() << dendl; return -EINVAL; } return 0; } +int get_bucket_id(const std::string& bucket_name, const std::string& tenant_name, rgw_bucket& bucket_id) { + std::string parsed_bucket_name; + std::string parsed_tenant_name; + if (const auto ret = rgw_parse_url_bucket(bucket_name, tenant_name, parsed_tenant_name, parsed_bucket_name); ret < 0) { + return ret; + } + bucket_id = rgw_bucket{parsed_tenant_name, parsed_bucket_name}; + return 0; +} + +int update_bucket_logging_sources(const DoutPrefixProvider* dpp, rgw::sal::Driver* driver, const rgw_bucket& target_bucket_id, const rgw_bucket& src_bucket_id, bool add, optional_yield y) { + std::unique_ptr<rgw::sal::Bucket> target_bucket; + const auto ret = driver->load_bucket(dpp, target_bucket_id, &target_bucket, y); + if (ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: failed to get target bucket '" << target_bucket_id << "', ret = " << ret << dendl; + return ret; + } + return update_bucket_logging_sources(dpp, target_bucket, src_bucket_id, add, y); +} + +int update_bucket_logging_sources(const DoutPrefixProvider* dpp, std::unique_ptr<rgw::sal::Bucket>& bucket, const rgw_bucket& src_bucket_id, bool add, optional_yield y) { + return retry_raced_bucket_write(dpp, bucket.get(), [dpp, &bucket, &src_bucket_id, add, y] { + auto& attrs = bucket->get_attrs(); + auto iter = attrs.find(RGW_ATTR_BUCKET_LOGGING_SOURCES); + if (iter == attrs.end()) { + if (!add) { + ldpp_dout(dpp, 20) << "INFO: no logging sources attribute '" << RGW_ATTR_BUCKET_LOGGING_SOURCES + << "' for bucket '" << bucket->get_info().bucket << "', nothing to remove" << dendl; + return 0; + } + source_buckets sources{src_bucket_id}; + bufferlist bl; + ceph::encode(sources, bl); + attrs.insert(std::make_pair(RGW_ATTR_BUCKET_LOGGING_SOURCES, std::move(bl))); + return bucket->merge_and_store_attrs(dpp, attrs, y); + } + try { + source_buckets sources; + ceph::decode(sources, iter->second); + if ((add && sources.insert(src_bucket_id).second) || + (!add && sources.erase(src_bucket_id) > 0)) { + bufferlist bl; + ceph::encode(sources, bl); + iter->second = std::move(bl); + return bucket->merge_and_store_attrs(dpp, attrs, y); + } + } catch (buffer::error& err) { + ldpp_dout(dpp, 1) << "WARNING: failed to decode logging sources attribute '" << RGW_ATTR_BUCKET_LOGGING_SOURCES + << "' for bucket '" << bucket->get_info().bucket << "', error: " << err.what() << dendl; + } + ldpp_dout(dpp, 20) << "INFO: logging source '" << src_bucket_id << "' already " << + (add ? "added to" : "removed from") << " bucket '" << bucket->get_info().bucket << "'" << dendl; + return 0; + }, y); +} + + +int bucket_deletion_cleanup(const DoutPrefixProvider* dpp, + sal::Driver* driver, + sal::Bucket* bucket, + optional_yield y) { + // if the bucket is used a log bucket, we should delete all pending log objects + // and also delete the object holding the pending object name + auto& attrs = bucket->get_attrs(); + if (const auto iter = attrs.find(RGW_ATTR_BUCKET_LOGGING_SOURCES); iter != attrs.end()) { + try { + source_buckets sources; + ceph::decode(sources, iter->second); + for (const auto& source : sources) { + std::unique_ptr<rgw::sal::Bucket> src_bucket; + if (const auto ret = driver->load_bucket(dpp, source, &src_bucket, y); ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: failed to get logging source bucket '" << source << "' for log bucket '" << + bucket->get_info().bucket << "', ret = " << ret << dendl; + continue; + } + auto& src_attrs = src_bucket->get_attrs(); + if (const auto iter = src_attrs.find(RGW_ATTR_BUCKET_LOGGING); iter != src_attrs.end()) { + configuration conf; + try { + auto bl_iter = iter->second.cbegin(); + decode(conf, bl_iter); + std::string obj_name; + RGWObjVersionTracker objv; + if (const auto ret = bucket->get_logging_object_name(obj_name, conf.target_prefix, y, dpp, &objv); ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: failed to get logging object name for log bucket '" << bucket->get_info().bucket << + "', ret = " << ret << dendl; + continue; + } + if (const auto ret = bucket->remove_logging_object(obj_name, y, dpp); ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: failed to delete pending logging object '" << obj_name << "' for log bucket '" << + bucket->get_info().bucket << "', ret = " << ret << dendl; + continue; + } + ldpp_dout(dpp, 20) << "INFO: successfully deleted pending logging object '" << obj_name << "' from deleted log bucket '" << + bucket->get_info().bucket << "'" << dendl; + if (const auto ret = bucket->remove_logging_object_name(conf.target_prefix, y, dpp, &objv); ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: failed to delete object holding bucket logging object name for log bucket '" << + bucket->get_info().bucket << "', ret = " << ret << dendl; + continue; + } + ldpp_dout(dpp, 20) << "INFO: successfully deleted object holding bucket logging object name from deleted log bucket '" << + bucket->get_info().bucket << "'" << dendl; + } catch (buffer::error& err) { + ldpp_dout(dpp, 1) << "WARNING: failed to decode logging attribute '" << RGW_ATTR_BUCKET_LOGGING + << "' of bucket '" << src_bucket->get_info().bucket << "', error: " << err.what() << dendl; + } + } + } + } catch (buffer::error& err) { + ldpp_dout(dpp, 1) << "WARNING: failed to decode logging sources attribute '" << RGW_ATTR_BUCKET_LOGGING_SOURCES + << "' for bucket '" << bucket->get_info().bucket << "', error: " << err.what() << dendl; + return -EIO; + } + } + + return source_bucket_cleanup(dpp, driver, bucket, false, y); +} + +int source_bucket_cleanup(const DoutPrefixProvider* dpp, + sal::Driver* driver, + sal::Bucket* bucket, + bool remove_attr, + optional_yield y) { + std::optional<configuration> conf; + const auto& info = bucket->get_info(); + if (const auto ret = retry_raced_bucket_write(dpp, bucket, [dpp, bucket, &conf, &info, remove_attr, y] { + auto& attrs = bucket->get_attrs(); + if (auto iter = attrs.find(RGW_ATTR_BUCKET_LOGGING); iter != attrs.end()) { + try { + auto bl_iter = iter->second.cbegin(); + configuration tmp_conf; + tmp_conf.enabled = true; + decode(tmp_conf, bl_iter); + conf = std::move(tmp_conf); + } catch (buffer::error& err) { + ldpp_dout(dpp, 1) << "WARNING: failed to decode existing logging attribute '" << RGW_ATTR_BUCKET_LOGGING + << "' of bucket '" << info.bucket << "', error: " << err.what() << dendl; + return -EIO; + } + if (remove_attr) { + attrs.erase(iter); + return bucket->merge_and_store_attrs(dpp, attrs, y); + } + } + // nothing to remove or no need to remove + return 0; + }, y); ret < 0) { + if (remove_attr) { + ldpp_dout(dpp, 1) << "ERROR: failed to remove logging attribute '" << RGW_ATTR_BUCKET_LOGGING << "' from bucket '" << + info.bucket << "', ret = " << ret << dendl; + } + return ret; + } + if (!conf) { + // no logging attribute found + return 0; + } + if (const auto ret = commit_logging_object(*conf, dpp, driver, info.bucket.tenant, y); ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: could not commit pending logging object of bucket '" << + info.bucket << "', ret = " << ret << dendl; + } else { + ldpp_dout(dpp, 20) << "INFO: successfully committed pending logging object of bucket '" << info.bucket << "'" << dendl; + } + rgw_bucket target_bucket_id; + rgw_bucket src_bucket_id{info.bucket.tenant, info.bucket.name}; + if (const auto ret = get_bucket_id(conf->target_bucket, info.bucket.tenant, target_bucket_id); ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: failed to parse target bucket '" << conf->target_bucket << "', ret = " << ret << dendl; + return 0; + } + if (const auto ret = update_bucket_logging_sources(dpp, driver, target_bucket_id, src_bucket_id, false, y); ret < 0) { + ldpp_dout(dpp, 1) << "WARNING: could not update bucket logging source '" << + info.bucket << "', ret = " << ret << dendl; + return 0; + } + ldpp_dout(dpp, 20) << "INFO: successfully updated bucket logging source '" << + info.bucket << "'"<< dendl; + return 0; +} + } // namespace rgw::bucketlogging diff --git a/src/rgw/rgw_bucket_logging.h b/src/rgw/rgw_bucket_logging.h index d4877bafb0f..cbdb8b55f88 100644 --- a/src/rgw/rgw_bucket_logging.h +++ b/src/rgw/rgw_bucket_logging.h @@ -4,7 +4,6 @@ #pragma once #include <string> -#include <optional> #include <cstdint> #include "rgw_sal_fwd.h" #include "include/buffer.h" @@ -16,7 +15,7 @@ class XMLObj; namespace ceph { class Formatter; } class DoutPrefixProvider; struct req_state; -class RGWObjVersionTracker; +struct RGWObjVersionTracker; class RGWOp; namespace rgw::bucketlogging { @@ -66,6 +65,17 @@ enum class LoggingType {Standard, Journal, Any}; enum class PartitionDateSource {DeliveryTime, EventTime}; struct configuration { + bool operator==(const configuration& rhs) const { + return enabled == rhs.enabled && + target_bucket == rhs.target_bucket && + obj_key_format == rhs.obj_key_format && + target_prefix == rhs.target_prefix && + obj_roll_time == rhs.obj_roll_time && + logging_type == rhs.logging_type && + records_batch_size == rhs.records_batch_size && + date_source == rhs.date_source && + key_filter == rhs.key_filter; + } uint32_t default_obj_roll_time = 300; bool enabled = false; std::string target_bucket; @@ -129,6 +139,8 @@ struct configuration { }; WRITE_CLASS_ENCODER(configuration) +using source_buckets = std::set<rgw_bucket>; + constexpr unsigned MAX_BUCKET_LOGGING_BUFFER = 1000; using bucket_logging_records = std::array<std::string, MAX_BUCKET_LOGGING_BUFFER>; @@ -155,7 +167,7 @@ int log_record(rgw::sal::Driver* driver, bool async_completion, bool log_source_bucket); -// commit the pending log objec tto the log bucket +// commit the pending log objec to the log bucket // and create a new pending log object // if "must_commit" is "false" the function will return success even if the pending log object was not committed int rollover_logging_object(const configuration& conf, @@ -166,6 +178,23 @@ int rollover_logging_object(const configuration& conf, bool must_commit, RGWObjVersionTracker* objv_tracker); +// commit the pending log object to the log bucket +// use this for cleanup, when new pending object is not needed +// and target bucket is known +int commit_logging_object(const configuration& conf, + const std::unique_ptr<rgw::sal::Bucket>& target_bucket, + const DoutPrefixProvider *dpp, + optional_yield y); + +// commit the pending log object to the log bucket +// use this for cleanup, when new pending object is not needed +// and target bucket shoud be loaded based on the configuration +int commit_logging_object(const configuration& conf, + const DoutPrefixProvider *dpp, + rgw::sal::Driver* driver, + const std::string& tenant_name, + optional_yield y); + // return the oid of the object holding the name of the temporary logging object // bucket - log bucket // prefix - logging prefix from configuration. should be used when multiple buckets log into the same log bucket @@ -185,5 +214,37 @@ int log_record(rgw::sal::Driver* driver, optional_yield y, bool async_completion, bool log_source_bucket); + +// return (by ref) an rgw_bucket object with the bucket name and tenant name +// fails if the bucket name is not in the format: [tenant name:]<bucket name> +int get_bucket_id(const std::string& bucket_name, const std::string& tenant_name, rgw_bucket& bucket_id); + +// update (add or remove) a source bucket from the list of source buckets in the target bucket +// use this function when the target bucket is already loaded +int update_bucket_logging_sources(const DoutPrefixProvider* dpp, std::unique_ptr<rgw::sal::Bucket>& bucket, + const rgw_bucket& src_bucket, bool add, optional_yield y); + +// update (add or remove) a source bucket from the list of source buckets in the target bucket +// use this function when the target bucket is not known and needs to be loaded +int update_bucket_logging_sources(const DoutPrefixProvider* dpp, rgw::sal::Driver* driver, const rgw_bucket& target_bucket_id, + const rgw_bucket& src_bucket_id, bool add, optional_yield y); + +// when source bucket is deleted, all pending log objects should be comitted to the log bucket +// when the target bucket is deleted, all pending log objects should be deleted, as well as the object holding the pending log object name +int bucket_deletion_cleanup(const DoutPrefixProvider* dpp, + sal::Driver* driver, + sal::Bucket* bucket, + optional_yield y); + +// if bucket has bucket logging configuration associated with it then: +// if "remove_attr" is true, the bucket logging configuration should be removed from the bucket +// in addition: +// any pending log objects should be comitted to the log bucket +// and the log bucket should be updated to remove the bucket as a source +int source_bucket_cleanup(const DoutPrefixProvider* dpp, + sal::Driver* driver, + sal::Bucket* bucket, + bool remove_attr, + optional_yield y); } // namespace rgw::bucketlogging diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index d2917838f36..99f7db4f569 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -108,6 +108,7 @@ using ceph::crypto::MD5; #define RGW_ATTR_X_ROBOTS_TAG RGW_ATTR_PREFIX "x-robots-tag" #define RGW_ATTR_STORAGE_CLASS RGW_ATTR_PREFIX "storage_class" #define RGW_ATTR_BUCKET_LOGGING RGW_ATTR_PREFIX "logging" +#define RGW_ATTR_BUCKET_LOGGING_SOURCES RGW_ATTR_PREFIX "logging-sources" /* S3 Object Lock*/ #define RGW_ATTR_OBJECT_LOCK RGW_ATTR_PREFIX "object-lock" diff --git a/src/rgw/rgw_rest_bucket_logging.cc b/src/rgw/rgw_rest_bucket_logging.cc index ed12ce855a9..afd79b0a548 100644 --- a/src/rgw/rgw_rest_bucket_logging.cc +++ b/src/rgw/rgw_rest_bucket_logging.cc @@ -58,30 +58,29 @@ public: return; } - std::unique_ptr<rgw::sal::Bucket> bucket; - op_ret = driver->load_bucket(this, rgw_bucket(s->bucket_tenant, s->bucket_name), - &bucket, y); + const rgw_bucket src_bucket_id(s->bucket_tenant, s->bucket_name); + std::unique_ptr<rgw::sal::Bucket> src_bucket; + op_ret = driver->load_bucket(this, src_bucket_id, + &src_bucket, y); if (op_ret < 0) { - ldpp_dout(this, 1) << "ERROR: failed to get bucket '" << - (s->bucket_tenant.empty() ? s->bucket_name : s->bucket_tenant + ":" + s->bucket_name) << - "' info, ret = " << op_ret << dendl; + ldpp_dout(this, 1) << "ERROR: failed to get bucket '" << src_bucket_id << "', ret = " << op_ret << dendl; return; } - if (auto iter = bucket->get_attrs().find(RGW_ATTR_BUCKET_LOGGING); iter != bucket->get_attrs().end()) { + if (auto iter = src_bucket->get_attrs().find(RGW_ATTR_BUCKET_LOGGING); iter != src_bucket->get_attrs().end()) { try { configuration.enabled = true; decode(configuration, iter->second); } catch (buffer::error& err) { - ldpp_dout(this, 1) << "ERROR: failed to decode attribute '" << RGW_ATTR_BUCKET_LOGGING - << "'. error: " << err.what() << dendl; + ldpp_dout(this, 1) << "WARNING: failed to decode logging attribute '" << RGW_ATTR_BUCKET_LOGGING + << "' for bucket '" << src_bucket_id << "', error: " << err.what() << dendl; op_ret = -EIO; return; } } else { - ldpp_dout(this, 5) << "WARNING: no logging configuration on bucket '" << bucket->get_name() << "'" << dendl; + ldpp_dout(this, 5) << "WARNING: no logging configuration on bucket '" << src_bucket_id << "'" << dendl; return; } - ldpp_dout(this, 20) << "INFO: found logging configuration on bucket '" << bucket->get_name() << "'" + ldpp_dout(this, 20) << "INFO: found logging configuration on bucket '" << src_bucket_id << "'" << "'. configuration: " << configuration.to_json_str() << dendl; } @@ -159,58 +158,125 @@ class RGWPutBucketLoggingOp : public RGWDefaultResponseOp { return; } - std::unique_ptr<rgw::sal::Bucket> bucket; - op_ret = driver->load_bucket(this, rgw_bucket(s->bucket_tenant, s->bucket_name), - &bucket, y); + const rgw_bucket src_bucket_id(s->bucket_tenant, s->bucket_name); + std::unique_ptr<rgw::sal::Bucket> src_bucket; + op_ret = driver->load_bucket(this, src_bucket_id, + &src_bucket, y); if (op_ret < 0) { - ldpp_dout(this, 1) << "ERROR: failed to get bucket '" << s->bucket_name << "', ret = " << op_ret << dendl; + ldpp_dout(this, 1) << "ERROR: failed to get bucket '" << src_bucket_id << "', ret = " << op_ret << dendl; return; } - - auto& attrs = bucket->get_attrs(); if (!configuration.enabled) { - if (auto iter = attrs.find(RGW_ATTR_BUCKET_LOGGING); iter != attrs.end()) { - attrs.erase(iter); - } - } else { - std::unique_ptr<rgw::sal::Bucket> target_bucket; - op_ret = driver->load_bucket(this, rgw_bucket(s->bucket_tenant, configuration.target_bucket), - &target_bucket, y); - if (op_ret < 0) { - ldpp_dout(this, 1) << "ERROR: failed to get target bucket '" << configuration.target_bucket << "', ret = " << op_ret << dendl; - return; - } - const auto& target_attrs = target_bucket->get_attrs(); - if (target_attrs.find(RGW_ATTR_BUCKET_LOGGING) != target_attrs.end()) { - // target bucket must not have logging set on it - ldpp_dout(this, 1) << "ERROR: logging target bucket '" << configuration.target_bucket << "', is configured with bucket logging" << dendl; - op_ret = -EINVAL; - return; - } - // TODO: verify target bucket does not have encryption - bufferlist conf_bl; - encode(configuration, conf_bl); - attrs[RGW_ATTR_BUCKET_LOGGING] = conf_bl; - // TODO: should we add attribute to target bucket indicating it is target to bucket logging? - // if we do, how do we maintain it when bucket logging changes? + op_ret = rgw::bucketlogging::source_bucket_cleanup(this, driver, src_bucket.get(), true, y); + return; + } + + // set logging configuration + rgw_bucket target_bucket_id; + if (op_ret = rgw::bucketlogging::get_bucket_id(configuration.target_bucket, s->bucket_tenant, target_bucket_id); op_ret < 0) { + ldpp_dout(this, 1) << "ERROR: failed to parse target bucket '" << configuration.target_bucket << "', ret = " << op_ret << dendl; + return; + } + + if (target_bucket_id == src_bucket_id) { + ldpp_dout(this, 1) << "ERROR: target bucket '" << target_bucket_id << "' must be different from source bucket" << dendl; + op_ret = -EINVAL; + return; + } + std::unique_ptr<rgw::sal::Bucket> target_bucket; + op_ret = driver->load_bucket(this, target_bucket_id, + &target_bucket, y); + if (op_ret < 0) { + ldpp_dout(this, 1) << "ERROR: failed to get target bucket '" << target_bucket_id << "', ret = " << op_ret << dendl; + return; + } + auto& target_attrs = target_bucket->get_attrs(); + if (target_attrs.find(RGW_ATTR_BUCKET_LOGGING) != target_attrs.end()) { + // target bucket must not have logging set on it + ldpp_dout(this, 1) << "ERROR: logging target bucket '" << target_bucket_id << "', is configured with bucket logging" << dendl; + op_ret = -EINVAL; + return; } - // TODO: use retry_raced_bucket_write from rgw_op.cc - op_ret = bucket->merge_and_store_attrs(this, attrs, y); + // verify target bucket does not have encryption + if (target_attrs.find(RGW_ATTR_BUCKET_ENCRYPTION_POLICY) != target_attrs.end()) { + ldpp_dout(this, 1) << "ERROR: logging target bucket '" << target_bucket_id << "', is configured with encryption" << dendl; + op_ret = -EINVAL; + return; + } + std::optional<rgw::bucketlogging::configuration> old_conf; + bufferlist conf_bl; + encode(configuration, conf_bl); + op_ret = retry_raced_bucket_write(this, src_bucket.get(), [this, &conf_bl, &src_bucket, &old_conf, &configuration, y] { + auto& attrs = src_bucket->get_attrs(); + auto it = attrs.find(RGW_ATTR_BUCKET_LOGGING); + if (it != attrs.end()) { + try { + rgw::bucketlogging::configuration tmp_conf; + tmp_conf.enabled = true; + decode(tmp_conf, it->second); + old_conf = std::move(tmp_conf); + } catch (buffer::error& err) { + ldpp_dout(this, 1) << "WARNING: failed to decode existing logging attribute '" << RGW_ATTR_BUCKET_LOGGING + << "' for bucket '" << src_bucket->get_info().bucket << "', error: " << err.what() << dendl; + } + if (!old_conf || (old_conf && *old_conf != configuration)) { + // conf changed (or was unknown) - update + it->second = conf_bl; + return src_bucket->merge_and_store_attrs(this, attrs, y); + } + // nothing to update + return 0; + } + // conf was added + attrs.insert(std::make_pair(RGW_ATTR_BUCKET_LOGGING, conf_bl)); + return src_bucket->merge_and_store_attrs(this, attrs, y); + }, y); if (op_ret < 0) { ldpp_dout(this, 1) << "ERROR: failed to set logging attribute '" << RGW_ATTR_BUCKET_LOGGING << "' to bucket '" << - bucket->get_name() << "', ret = " << op_ret << dendl; + src_bucket_id << "', ret = " << op_ret << dendl; return; } - - ldpp_dout(this, 20) << "INFO: " << (configuration.enabled ? "wrote" : "removed") - << " logging configuration. bucket '" << bucket->get_name() << "'. configuration: " << - configuration.to_json_str() << dendl; + if (!old_conf) { + ldpp_dout(this, 20) << "INFO: new logging configuration added to bucket '" << src_bucket_id << "'. configuration: " << + configuration.to_json_str() << dendl; + if (const auto ret = rgw::bucketlogging::update_bucket_logging_sources(this, target_bucket, src_bucket_id, true, y); ret < 0) { + ldpp_dout(this, 1) << "WARNING: failed to add source bucket '" << src_bucket_id << "' to logging sources of target bucket '" << + target_bucket_id << "', ret = " << ret << dendl; + } + } else if (*old_conf != configuration) { + // conf changed - do cleanup + if (const auto ret = commit_logging_object(*old_conf, target_bucket, this, y); ret < 0) { + ldpp_dout(this, 1) << "WARNING: could not commit pending logging object when updating logging configuration of bucket '" << + src_bucket->get_info().bucket << "', ret = " << ret << dendl; + } else { + ldpp_dout(this, 20) << "INFO: committed pending logging object when updating logging configuration of bucket '" << + src_bucket->get_info().bucket << "'" << dendl; + } + if (old_conf->target_bucket != configuration.target_bucket) { + rgw_bucket old_target_bucket_id; + if (const auto ret = rgw::bucketlogging::get_bucket_id(old_conf->target_bucket, s->bucket_tenant, old_target_bucket_id); ret < 0) { + ldpp_dout(this, 1) << "ERROR: failed to parse target bucket '" << old_conf->target_bucket << "', ret = " << ret << dendl; + return; + } + if (const auto ret = rgw::bucketlogging::update_bucket_logging_sources(this, driver, old_target_bucket_id, src_bucket_id, false, y); ret < 0) { + ldpp_dout(this, 1) << "WARNING: failed to remove source bucket '" << src_bucket_id << "' from logging sources of original target bucket '" << + old_target_bucket_id << "', ret = " << ret << dendl; + } + if (const auto ret = rgw::bucketlogging::update_bucket_logging_sources(this, target_bucket, src_bucket_id, true, y); ret < 0) { + ldpp_dout(this, 1) << "WARNING: failed to add source bucket '" << src_bucket_id << "' to logging sources of target bucket '" << + target_bucket_id << "', ret = " << ret << dendl; + } + } + ldpp_dout(this, 20) << "INFO: wrote logging configuration to bucket '" << src_bucket_id << "'. configuration: " << + configuration.to_json_str() << dendl; + } else { + ldpp_dout(this, 20) << "INFO: logging configuration of bucket '" << src_bucket_id << "' did not change" << dendl; + } } }; // Post /<bucket name>/?logging -// actual configuration is XML encoded in the body of the message class RGWPostBucketLoggingOp : public RGWDefaultResponseOp { int verify_permission(optional_yield y) override { auto [has_s3_existing_tag, has_s3_resource_tag] = rgw_check_policy_condition(this, s, false); @@ -234,17 +300,18 @@ class RGWPostBucketLoggingOp : public RGWDefaultResponseOp { return; } - std::unique_ptr<rgw::sal::Bucket> bucket; - op_ret = driver->load_bucket(this, rgw_bucket(s->bucket_tenant, s->bucket_name), - &bucket, y); + const rgw_bucket src_bucket_id(s->bucket_tenant, s->bucket_name); + std::unique_ptr<rgw::sal::Bucket> src_bucket; + op_ret = driver->load_bucket(this, src_bucket_id, + &src_bucket, y); if (op_ret < 0) { - ldpp_dout(this, 1) << "ERROR: failed to get bucket '" << s->bucket_name << "', ret = " << op_ret << dendl; + ldpp_dout(this, 1) << "ERROR: failed to get bucket '" << src_bucket_id << "', ret = " << op_ret << dendl; return; } - const auto& bucket_attrs = bucket->get_attrs(); + const auto& bucket_attrs = src_bucket->get_attrs(); auto iter = bucket_attrs.find(RGW_ATTR_BUCKET_LOGGING); if (iter == bucket_attrs.end()) { - ldpp_dout(this, 1) << "WARNING: no logging configured on bucket" << dendl; + ldpp_dout(this, 1) << "WARNING: no logging configured on bucket '" << src_bucket_id << "'" << dendl; return; } rgw::bucketlogging::configuration configuration; @@ -252,33 +319,38 @@ class RGWPostBucketLoggingOp : public RGWDefaultResponseOp { configuration.enabled = true; decode(configuration, iter->second); } catch (buffer::error& err) { - ldpp_dout(this, 1) << "ERROR: failed to decode logging attribute '" << RGW_ATTR_BUCKET_LOGGING - << "'. error: " << err.what() << dendl; + ldpp_dout(this, 1) << "WARNING: failed to decode logging attribute '" << RGW_ATTR_BUCKET_LOGGING + << "' for bucket '" << src_bucket_id << "', error: " << err.what() << dendl; op_ret = -EINVAL; return; } + rgw_bucket target_bucket_id; + if (op_ret = rgw::bucketlogging::get_bucket_id(configuration.target_bucket, s->bucket_tenant, target_bucket_id); op_ret < 0) { + ldpp_dout(this, 1) << "ERROR: failed to parse target bucket '" << configuration.target_bucket << "', ret = " << op_ret << dendl; + return; + } std::unique_ptr<rgw::sal::Bucket> target_bucket; - op_ret = driver->load_bucket(this, rgw_bucket(s->bucket_tenant, configuration.target_bucket), + op_ret = driver->load_bucket(this, target_bucket_id, &target_bucket, y); if (op_ret < 0) { - ldpp_dout(this, 1) << "ERROR: failed to get target bucket '" << configuration.target_bucket << "', ret = " << op_ret << dendl; + ldpp_dout(this, 1) << "ERROR: failed to get target bucket '" << target_bucket_id << "', ret = " << op_ret << dendl; return; } std::string obj_name; RGWObjVersionTracker objv_tracker; op_ret = target_bucket->get_logging_object_name(obj_name, configuration.target_prefix, null_yield, this, &objv_tracker); if (op_ret < 0) { - ldpp_dout(this, 1) << "ERROR: failed to get pending logging object name from target bucket '" << configuration.target_bucket << "'" << dendl; + ldpp_dout(this, 1) << "ERROR: failed to get pending logging object name from target bucket '" << target_bucket_id << "'" << dendl; return; } op_ret = rgw::bucketlogging::rollover_logging_object(configuration, target_bucket, obj_name, this, null_yield, true, &objv_tracker); if (op_ret < 0) { ldpp_dout(this, 1) << "ERROR: failed to flush pending logging object '" << obj_name - << "' to target bucket '" << configuration.target_bucket << "'" << dendl; + << "' to target bucket '" << target_bucket_id << "'" << dendl; return; } - ldpp_dout(this, 20) << "flushed pending logging object '" << obj_name + ldpp_dout(this, 20) << "INFO: flushed pending logging object '" << obj_name << "' to target bucket '" << configuration.target_bucket << "'" << dendl; } }; diff --git a/src/rgw/rgw_s3_filter.h b/src/rgw/rgw_s3_filter.h index 9bbc4ef0088..0273da9a364 100644 --- a/src/rgw/rgw_s3_filter.h +++ b/src/rgw/rgw_s3_filter.h @@ -9,6 +9,7 @@ class XMLObj; struct rgw_s3_key_filter { + bool operator==(const rgw_s3_key_filter& rhs) const = default; std::string prefix_rule; std::string suffix_rule; std::string regex_rule; diff --git a/src/rgw/rgw_sal.h b/src/rgw/rgw_sal.h index 4b94f74b851..97e25179fc9 100644 --- a/src/rgw/rgw_sal.h +++ b/src/rgw/rgw_sal.h @@ -1006,20 +1006,27 @@ class Bucket { optional_yield y, const DoutPrefixProvider *dpp) = 0; /** Read the name of the pending bucket logging object name */ - virtual int get_logging_object_name(std::string& obj_name, - const std::string& prefix, - optional_yield y, + virtual int get_logging_object_name(std::string& obj_name, + const std::string& prefix, + optional_yield y, const DoutPrefixProvider *dpp, RGWObjVersionTracker* objv_tracker) = 0; /** Update the name of the pending bucket logging object name */ - virtual int set_logging_object_name(const std::string& obj_name, - const std::string& prefix, - optional_yield y, - const DoutPrefixProvider *dpp, + virtual int set_logging_object_name(const std::string& obj_name, + const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, bool new_obj, RGWObjVersionTracker* objv_tracker) = 0; + /** Remove the object holding the name of the pending bucket logging object */ + virtual int remove_logging_object_name(const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, + RGWObjVersionTracker* objv_tracker) = 0; /** Move the pending bucket logging object into the bucket */ virtual int commit_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp) = 0; + //** Remove the pending bucket logging object */ + virtual int remove_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp) = 0; /** Write a record to the pending bucket logging object */ virtual int write_logging_object(const std::string& obj_name, const std::string& record, optional_yield y, const DoutPrefixProvider *dpp, bool async_completion) = 0; diff --git a/src/rgw/rgw_sal_filter.h b/src/rgw/rgw_sal_filter.h index 947ce9d4bf5..b6b6ed42b8f 100644 --- a/src/rgw/rgw_sal_filter.h +++ b/src/rgw/rgw_sal_filter.h @@ -666,24 +666,33 @@ public: optional_yield y, const DoutPrefixProvider *dpp) override { return next->remove_topics(objv_tracker, y, dpp); } - int get_logging_object_name(std::string& obj_name, - const std::string& prefix, - optional_yield y, + int get_logging_object_name(std::string& obj_name, + const std::string& prefix, + optional_yield y, const DoutPrefixProvider *dpp, RGWObjVersionTracker* objv_tracker) override { return next->get_logging_object_name(obj_name, prefix, y, dpp, objv_tracker); } - int set_logging_object_name(const std::string& obj_name, - const std::string& prefix, - optional_yield y, - const DoutPrefixProvider *dpp, + int set_logging_object_name(const std::string& obj_name, + const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, bool new_obj, RGWObjVersionTracker* objv_track) override { - return next->set_logging_object_name(obj_name, prefix, y, dpp, new_obj, objv_track); + return next->set_logging_object_name(obj_name, prefix, y, dpp, new_obj, objv_track); + } + int remove_logging_object_name(const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, + RGWObjVersionTracker* objv_tracker) override { + return next->remove_logging_object_name(prefix, y, dpp, objv_tracker); } int commit_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp)override { return next->commit_logging_object(obj_name, y, dpp); } + int remove_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp) override { + return next->remove_logging_object(obj_name, y, dpp); + } int write_logging_object(const std::string& obj_name, const std::string& record, optional_yield y, const DoutPrefixProvider *dpp, bool async_completion) override { return next->write_logging_object(obj_name, record, y, dpp, async_completion); } diff --git a/src/rgw/rgw_sal_store.h b/src/rgw/rgw_sal_store.h index 5cb98d23158..99b90564997 100644 --- a/src/rgw/rgw_sal_store.h +++ b/src/rgw/rgw_sal_store.h @@ -253,18 +253,23 @@ class StoreBucket : public Bucket { optional_yield y, const DoutPrefixProvider *dpp) override {return 0;} int remove_topics(RGWObjVersionTracker* objv_tracker, optional_yield y, const DoutPrefixProvider *dpp) override {return 0;} - int get_logging_object_name(std::string& obj_name, - const std::string& prefix, - optional_yield y, + int get_logging_object_name(std::string& obj_name, + const std::string& prefix, + optional_yield y, const DoutPrefixProvider *dpp, RGWObjVersionTracker* objv_tracker) override { return 0; } - int set_logging_object_name(const std::string& obj_name, - const std::string& prefix, - optional_yield y, - const DoutPrefixProvider *dpp, + int set_logging_object_name(const std::string& obj_name, + const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, bool new_obj, RGWObjVersionTracker* objv_tracker) override { return 0; } + int remove_logging_object_name(const std::string& prefix, + optional_yield y, + const DoutPrefixProvider *dpp, + RGWObjVersionTracker* objv_tracker) override { return 0; } int commit_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp) override { return 0; } + int remove_logging_object(const std::string& obj_name, optional_yield y, const DoutPrefixProvider *dpp) override { return 0; } int write_logging_object(const std::string& obj_name, const std::string& record, optional_yield y, const DoutPrefixProvider *dpp, bool async_completion) override { return 0; } diff --git a/src/test/cli/radosgw-admin/help.t b/src/test/cli/radosgw-admin/help.t index 8b7c6d6e3db..c1675d11a80 100644 --- a/src/test/cli/radosgw-admin/help.t +++ b/src/test/cli/radosgw-admin/help.t @@ -43,7 +43,8 @@ bucket sync disable disable bucket sync bucket sync enable enable bucket sync bucket radoslist list rados objects backing bucket's objects - bucket logging flush flush pending log records object of source bucket to the log bucket to bucket + bucket logging flush flush pending log records object of source bucket to the log bucket + bucket logging info get info on bucket logging configuration on source bucket or list of sources in log bucket bi get retrieve bucket index object entries bi put store bucket index object entries bi list list raw bucket index entries diff --git a/src/test/objectstore/test_bluefs.cc b/src/test/objectstore/test_bluefs.cc index 60147b5397c..32173d61afe 100644 --- a/src/test/objectstore/test_bluefs.cc +++ b/src/test/objectstore/test_bluefs.cc @@ -1426,6 +1426,87 @@ TEST(BlueFS, test_concurrent_dir_link_and_compact_log_56210) { } } +TEST(BlueFS, truncate_drops_allocations) { + constexpr uint64_t K = 1024; + constexpr uint64_t M = 1024 * K; + uuid_d fsid; + const char* DIR_NAME="dir"; + const char* FILE_NAME="file1"; + struct { + uint64_t preallocated_size; + uint64_t write_size; + uint64_t truncate_to; + uint64_t allocated_after_truncate; + uint64_t slow_size = 0; + uint64_t slow_alloc_size = 64*K; + uint64_t db_size = 128*M; + uint64_t db_alloc_size = 1*M; + } scenarios [] = { + // on DB(which is SLOW) : 1 => 1, 64K remains + { 1*M, 1, 1, 64*K }, + // on DB(which is SLOW), alloc 4K : 1 => 1, 4K remains + { 1*M, 1, 1, 4*K, 0, 4*K }, + // on DB(which is SLOW), truncation on AU boundary : 128K => 128K, 128K remains + { 1*M, 128*K, 128*K, 128*K }, + // on DB(which is SLOW), no prealloc, truncation to 0 : 1666K => 0, 0 remains + { 0, 1666*K, 0, 0 }, + // on DB, truncate to 123K, expect 1M occupied + { 1234*K, 123*K, 123*K, 1*M, 128*M, 64*K, 10*M, 1*M }, + // on DB, truncate to 0, expect 0 occupied + { 1234*K, 345*K, 0, 0, 128*M, 64*K, 10*M, 1*M }, + // on DB, truncate to AU boundary, expect exactly 1M occupied + { 1234*K, 1123*K, 1*M, 1*M, 128*M, 64*K, 10*M, 1*M }, + // on DB and SLOW, truncate only data on SLOW + { 0, 10*M+1, 10*M+1, 10*M+64*K, 128*M, 64*K, 10*M, 1*M }, + // on DB and SLOW, preallocate and truncate only data on SLOW + { 6*M, 12*M, 10*M+1, 10*M+64*K, 128*M, 64*K, 10*M, 1*M }, + // on DB and SLOW, preallocate and truncate all in SLOW and some on DB + // note! prealloc 6M is important, one allocation for 12M will fallback to SLOW + // in 6M + 6M we can be sure that 6M is on DB and 6M is on SLOW + { 6*M, 12*M, 3*M+1, 4*M, 128*M, 64*K, 11*M, 1*M }, + }; + for (auto& s : scenarios) { + ConfSaver conf(g_ceph_context->_conf); + conf.SetVal("bluefs_shared_alloc_size", stringify(s.slow_alloc_size).c_str()); + conf.SetVal("bluefs_alloc_size", stringify(s.db_alloc_size).c_str()); + + g_ceph_context->_conf.set_val("bluefs_shared_alloc_size", stringify(s.slow_alloc_size)); + g_ceph_context->_conf.set_val("bluefs_alloc_size", stringify(s.db_alloc_size)); + TempBdev bdev_db{s.db_size}; + TempBdev bdev_slow{s.slow_size}; + + BlueFS fs(g_ceph_context); + if (s.db_size != 0) { + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, 0)); + } + if (s.slow_size != 0) { + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0)); + } + + ASSERT_EQ(0, fs.mkfs(fsid, {BlueFS::BDEV_DB, false, false})); + ASSERT_EQ(0, fs.mount()); + ASSERT_EQ(0, fs.maybe_verify_layout({BlueFS::BDEV_DB, false, false})); + BlueFS::FileWriter *h; + ASSERT_EQ(0, fs.mkdir("dir")); + ASSERT_EQ(0, fs.open_for_write(DIR_NAME, FILE_NAME, &h, false)); + uint64_t pre = fs.get_used(); + ASSERT_EQ(0, fs.preallocate(h->file, 0, s.preallocated_size)); + const std::string content(s.write_size, 'x'); + h->append(content.c_str(), content.length()); + fs.fsync(h); + ASSERT_EQ(0, fs.truncate(h, s.truncate_to)); + fs.fsync(h); + uint64_t post = fs.get_used(); + fs.close_writer(h); + EXPECT_EQ(pre, post - s.allocated_after_truncate); + + fs.umount(); + } +} + + + + TEST(BlueFS, test_log_runway) { uint64_t max_log_runway = 65536; ConfSaver conf(g_ceph_context->_conf); |