38 files changed, 302 insertions, 89 deletions
diff --git a/.githubmap b/.githubmap
index 5265fa59bed..c8ae6e284a2 100644
--- a/.githubmap
+++ b/.githubmap
@@ -9,6 +9,7 @@
 #
 a2batic Kanika Murarka <kmurarka@redhat.com>
 aaSharma14 Aashish Sharma <aasharma@redhat.com>
+abhishek-kane Abhishek Kane <abhishek.kane@ibm.com> <abhishek.kane@gmail.com>
 aclamk Adam Kupczyk <akupczyk@redhat.com>
 adamemerson Adam C. Emerson <aemerson@redhat.com>
 adk3798 Adam King <adking@redhat.com>
diff --git a/.mailmap b/.mailmap
index 6322c4ba523..e111f70a3d0 100644
--- a/.mailmap
+++ b/.mailmap
@@ -13,6 +13,7 @@ Aashish Sharma <aasharma@redhat.com> <66050535+aaSharma14@users.noreply.github.c
 Aashish Sharma <aasharma@redhat.com> <aasharma@li-e74156cc-2f67-11b2-a85c-e98659a63c5c.ibm.com>
 Aashish Sharma <aasharma@redhat.com> <aashishsharma@fedora.redhat.com>
 Aashish Sharma <aasharma@redhat.com> <aashishsharma@localhost.localdomain>
+Abhishek Kane <abhishek.kane@ibm.com> <abhishek.kane@gmail.com>
 Abhishek Lekshmanan <abhishek.lekshmanan@cern.ch> <abhishek.l@cern.ch>
 Abhishek Lekshmanan <abhishek@suse.com> <abhishek.lekshmanan@gmail.com>
 Abhishek Lekshmanan <abhishek@suse.com> <alekshmanan@suse.com>
diff --git a/.organizationmap b/.organizationmap
index e59e6ae24e1..ac9b0ea70fe 100644
--- a/.organizationmap
+++ b/.organizationmap
@@ -345,6 +345,7 @@ Huawei <contact@huawei.com> Yehu <yehu5@huawei.com>
 Huayun <contact@huayun.com> Zheng Yin <zhengyin@huayun.com>
 Huazhong University of Science and Technology <contact@hust.edu.cn> Luo Runbing <runsisi@hust.edu.cn>
 HXT Semiconductor <contact@hxt-semitech.org> Jiang Yutang <yutang2.jiang@hxt-semitech.com>
+IBM <contact@IBM.com> Abhishek Kane <abhishek.kane@ibm.com>
 IBM <contact@IBM.com> Adam Kupczyk <akupczyk@ibm.com>
 IBM <contact@IBM.com> Afreen Misbah <afreen@ibm.com>
 IBM <contact@IBM.com> Aliaksei Makarau <aliaksei.makarau@ibm.com>
diff --git a/.peoplemap b/.peoplemap
index 418e8505fb4..ed70830c092 100644
--- a/.peoplemap
+++ b/.peoplemap
@@ -16,6 +16,7 @@
 #
 # git log --pretty='%aN <%aE>' $range | git -c mailmap.file=.peoplemap check-mailmap --stdin | sort | uniq | sed -e 's/\(.*\) \(<.*\)/\2 \1/' | uniq --skip-field=1 --all-repeated | sed -e 's/\(.*>\) \(.*\)/\2 \1/'
 #
+Abhishek Kane <abhishek.kane@ibm.com> <abhishek.kane@gmail.com>
 Abhishek Lekshmanan <abhishek.lekshmanan@cern.ch> <abhishek@suse.com>
 Adam Kupczyk <akupczyk@ibm.com> <akupczyk@redhat.com> <akupczyk@mirantis.com>
 Alexandre Marangone <amarango@redhat.com> Alexandre Marangone <alexandre.marangone@inktank.com>
diff --git a/PendingReleaseNotes b/PendingReleaseNotes
index b4824a65584..d25acfa9c6d 100644
--- a/PendingReleaseNotes
+++ b/PendingReleaseNotes
@@ -60,6 +60,10 @@
   fuse client for `fallocate` for the default case (i.e. mode == 0) since
   CephFS does not support disk space reservation. The only flags supported are
   `FALLOC_FL_KEEP_SIZE` and `FALLOC_FL_PUNCH_HOLE`.
+* pybind/rados: Fixes WriteOp.zero() in the original reversed order of arguments
+  `offset` and `length`. When pybind calls WriteOp.zero(), the argument passed
+  does not match rados_write_op_zero, and offset and length are swapped, which
+  results in an unexpected response.
 
 * The HeadBucket API now reports the `X-RGW-Bytes-Used` and `X-RGW-Object-Count`
   headers only when the `read-stats` querystring is explicitly included in the
diff --git a/container/Containerfile b/container/Containerfile
index c954ebed1be..9a5a88e76a1 100644
--- a/container/Containerfile
+++ b/container/Containerfile
@@ -212,6 +212,7 @@ RUN rpm -q $(cat packages.txt) && rm -f /var/lib/rpm/__db* && rm -f *packages.tx
 # Set some envs in the container for quickly inspecting details about the build at runtime
 ENV CEPH_IS_DEVEL="${CI_CONTAINER}" \
     CEPH_REF="${CEPH_REF}" \
+    CEPH_VERSION="${CEPH_REF}" \
     CEPH_OSD_FLAVOR="${OSD_FLAVOR}" \
     FROM_IMAGE="${FROM_IMAGE}"
 
diff --git a/debian/control b/debian/control
index a7d2dbb4c3a..a8c79f7a731 100644
--- a/debian/control
+++ b/debian/control
@@ -999,7 +999,8 @@ Depends: librados2 (= ${binary:Version}),
          liblua5.3-0,
          ${misc:Depends},
          ${shlibs:Depends},
-Suggests: luarocks,
+Suggests: liblua5.3-dev,
+          luarocks,
 Description: RADOS Gateway client library
  RADOS is a distributed object store used by the Ceph distributed
  storage system.  This package provides a REST gateway to the
diff --git a/doc/cephfs/disaster-recovery-experts.rst b/doc/cephfs/disaster-recovery-experts.rst
index 7677b42f47e..b01a3dfde6a 100644
--- a/doc/cephfs/disaster-recovery-experts.rst
+++ b/doc/cephfs/disaster-recovery-experts.rst
@@ -21,43 +21,46 @@ Advanced: Metadata repair tools
 Journal export
 --------------
 
-Before attempting dangerous operations, make a copy of the journal like so:
+Before attempting any dangerous operation, make a copy of the journal by
+running the following command:
 
-::
+.. prompt:: bash #
 
-    cephfs-journal-tool journal export backup.bin
+   cephfs-journal-tool journal export backup.bin
 
-Note that this command may not always work if the journal is badly corrupted,
-in which case a RADOS-level copy should be made (http://tracker.ceph.com/issues/9902).
+If the journal is badly corrupted, this command might not work. If the journal
+is badly corrupted, make a RADOS-level copy
+(http://tracker.ceph.com/issues/9902).
 
 
 Dentry recovery from journal
 ----------------------------
 
 If a journal is damaged or for any reason an MDS is incapable of replaying it,
-attempt to recover what file metadata we can like so:
+attempt to recover file metadata by running the following command:
 
-::
+.. prompt:: bash #
 
-    cephfs-journal-tool event recover_dentries summary
+   cephfs-journal-tool event recover_dentries summary
 
-This command by default acts on MDS rank 0, pass --rank=<n> to operate on other ranks.
+By default, this command acts on MDS rank ``0``. Pass the option ``--rank=<n>``
+to the ``cephfs-journal-tool`` command to operate on other ranks.
 
-This command will write any inodes/dentries recoverable from the journal
-into the backing store, if these inodes/dentries are higher-versioned
-than the previous contents of the backing store.  If any regions of the journal
-are missing/damaged, they will be skipped.
+This command writes all inodes and dentries recoverable from the journal into
+the backing store, but only if these inodes and dentries are higher-versioned
+than the existing contents of the backing store. Any regions of the journal
+that are missing or damaged will be skipped.
 
-Note that in addition to writing out dentries and inodes, this command will update
-the InoTables of each 'in' MDS rank, to indicate that any written inodes' numbers
-are now in use.  In simple cases, this will result in an entirely valid backing
+In addition to writing out dentries and inodes, this command updates the
+InoTables of each ``in`` MDS rank, to indicate that any written inodes' numbers
+are now in use. In simple cases, this will result in an entirely valid backing
 store state.
 
 .. warning::
 
-    The resulting state of the backing store is not guaranteed to be self-consistent,
-    and an online MDS scrub will be required afterwards.  The journal contents
-    will not be modified by this command, you should truncate the journal
+    The resulting state of the backing store is not guaranteed to be
+    self-consistent, and an online MDS scrub will be required afterwards. The
+    journal contents will not be modified by this command. Truncate the journal
     separately after recovering what you can.
 
 Journal truncation
diff --git a/doc/radosgw/config-ref.rst b/doc/radosgw/config-ref.rst
index b4aa56fff54..405bc727208 100644
--- a/doc/radosgw/config-ref.rst
+++ b/doc/radosgw/config-ref.rst
@@ -75,10 +75,11 @@ aggressiveness of lifecycle processing:
 .. confval:: rgw_lc_max_wp_worker
 
 These values can be tuned based upon your specific workload to further increase the
-aggressiveness of lifecycle processing. For a workload with a larger number of buckets (thousands)
-you would look at increasing the :confval:`rgw_lc_max_worker` value from the default value of 3 whereas for a
-workload with a smaller number of buckets but higher number of objects (hundreds of thousands)
-per bucket you would consider increasing :confval:`rgw_lc_max_wp_worker` from the default value of 3.
+aggressiveness of lifecycle processing. For a workload with a large number of buckets (thousands)
+you would raise the number of workers by increasing :confval:`rgw_lc_max_worker`
+from the default value of 3. Whereas for a workload with a higher number of objects per bucket
+(hundreds of thousands) you would raise the number of parallel threads
+by increasing :confval:`rgw_lc_max_wp_worker` from the default value of 3.
 
 .. note:: When looking to tune either of these specific values please validate the
    current Cluster performance and Ceph Object Gateway utilization before increasing.
diff --git a/qa/standalone/osd/osd-bluefs-volume-ops.sh b/qa/standalone/osd/osd-bluefs-volume-ops.sh
index aedfbc9b5cb..f7424de8ce1 100755
--- a/qa/standalone/osd/osd-bluefs-volume-ops.sh
+++ b/qa/standalone/osd/osd-bluefs-volume-ops.sh
@@ -72,7 +72,7 @@ function TEST_bluestore() {
 
     truncate $dir/0/block -s 4294967296 # 4GB
     ceph-bluestore-tool --path $dir/0 bluefs-bdev-expand || return 1
-    truncate $dir/1/block -s 4311744512 # 4GB + 16MB
+    truncate $dir/1/block -s 11811160064 # 11GB to get bdev label at 10737418240
     ceph-bluestore-tool --path $dir/1 bluefs-bdev-expand || return 1
     truncate $dir/2/block -s 4295099392 # 4GB + 129KB
     ceph-bluestore-tool --path $dir/2 bluefs-bdev-expand || return 1
diff --git a/qa/suites/rados/verify/clusters/fixed-4.yaml b/qa/suites/rados/verify/clusters/fixed-4.yaml
new file mode 120000
index 00000000000..aa88300715a
--- /dev/null
+++ b/qa/suites/rados/verify/clusters/fixed-4.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-4.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/lua/tasks/0-install.yaml b/qa/suites/rgw/lua/tasks/0-install.yaml
index fa6e279145c..d85ebcc5998 100644
--- a/qa/suites/rgw/lua/tasks/0-install.yaml
+++ b/qa/suites/rgw/lua/tasks/0-install.yaml
@@ -3,7 +3,7 @@ tasks:
 - ceph:
 - openssl_keys:
 - rgw: [client.0]
-- tox: [client.0]    
+- tox: [client.0]
 
 overrides:
   ceph:
@@ -11,3 +11,11 @@ overrides:
       global:
         osd_min_pg_log_entries: 10
         osd_max_pg_log_entries: 10
+  install:
+    ceph:
+      extra_system_packages:
+        rpm:
+        - luarocks
+        deb:
+        - liblua5.3-dev
+        - luarocks
diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py
index 9b04e3dc675..8f666d2fa9b 100644
--- a/qa/tasks/ceph.py
+++ b/qa/tasks/ceph.py
@@ -1206,8 +1206,18 @@ def cluster(ctx, config):
             args.extend([
                 run.Raw('|'), 'head', '-n', '1',
             ])
-            stdout = mon0_remote.sh(args)
-            return stdout or None
+            r = mon0_remote.run(
+                stdout=BytesIO(),
+                args=args,
+                stderr=StringIO(),
+            )
+            stdout = r.stdout.getvalue().decode()
+            if stdout:
+                return stdout
+            stderr = r.stderr.getvalue()
+            if stderr:
+                return stderr
+            return None
 
         if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
                              config['log_ignorelist']) is not None:
diff --git a/qa/tasks/cephadm.py b/qa/tasks/cephadm.py
index dab61c2c700..0cde6050718 100644
--- a/qa/tasks/cephadm.py
+++ b/qa/tasks/cephadm.py
@@ -475,12 +475,16 @@ def ceph_log(ctx, config):
                 run.Raw('|'), 'head', '-n', '1',
             ])
             r = ctx.ceph[cluster_name].bootstrap_remote.run(
-                stdout=StringIO(),
+                stdout=BytesIO(),
                 args=args,
+                stderr=StringIO(),
             )
-            stdout = r.stdout.getvalue()
-            if stdout != '':
+            stdout = r.stdout.getvalue().decode()
+            if stdout:
                 return stdout
+            stderr = r.stderr.getvalue()
+            if stderr:
+                return stderr
             return None
 
         # NOTE: technically the first and third arg to first_in_ceph_log
diff --git a/qa/tasks/rook.py b/qa/tasks/rook.py
index 6cb75173966..fae5ef3bf00 100644
--- a/qa/tasks/rook.py
+++ b/qa/tasks/rook.py
@@ -8,7 +8,7 @@ import json
 import logging
 import os
 import yaml
-from io import BytesIO
+from io import BytesIO, StringIO
 
 from tarfile import ReadError
 from tasks.ceph_manager import CephManager
@@ -235,10 +235,14 @@ def ceph_log(ctx, config):
             r = ctx.rook[cluster_name].remote.run(
                 stdout=BytesIO(),
                 args=args,
+                stderr=StringIO(),
             )
             stdout = r.stdout.getvalue().decode()
             if stdout:
                 return stdout
+            stderr = r.stderr.getvalue()
+            if stderr:
+                return stderr
             return None
 
         if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
diff --git a/src/mds/Server.cc b/src/mds/Server.cc
index 5874a3dce56..e66b5aa08c7 100644
--- a/src/mds/Server.cc
+++ b/src/mds/Server.cc
@@ -4167,7 +4167,7 @@ void Server::handle_client_getattr(const MDRequestRef& mdr, bool is_lookup)
 
     if (r < 0) {
       // fall-thru. let rdlock_path_pin_ref() check again.
-    } else if (is_lookup) {
+    } else if (is_lookup && mdr->dn[0].size()) {
       CDentry* dn = mdr->dn[0].back();
       mdr->pin(dn);
       auto em = dn->batch_ops.emplace(std::piecewise_construct, std::forward_as_tuple(mask), std::forward_as_tuple());
@@ -4274,7 +4274,7 @@ void Server::handle_client_getattr(const MDRequestRef& mdr, bool is_lookup)
   // reply
   dout(10) << "reply to stat on " << *req << dendl;
   mdr->tracei = ref;
-  if (is_lookup)
+  if (is_lookup && mdr->dn[0].size())
     mdr->tracedn = mdr->dn[0].back();
   respond_to_request(mdr, 0);
 }
diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc
index 7332ec3edb1..833bdddc71b 100644
--- a/src/mon/Monitor.cc
+++ b/src/mon/Monitor.cc
@@ -4024,7 +4024,7 @@ void Monitor::handle_command(MonOpRequestRef op)
 
     for (auto& p : mgrstatmon()->get_service_map().services) {
       auto &service = p.first;
-      if (ServiceMap::is_normal_ceph_entity(service)) {
+      if (ServiceMap::is_normal_ceph_entity(service) || service == "nvmeof") {
         continue;
       }
       f->open_object_section(service.c_str());
diff --git a/src/msg/async/EventEpoll.cc b/src/msg/async/EventEpoll.cc
index 7ed5321dcda..eb04e3b8e98 100644
--- a/src/msg/async/EventEpoll.cc
+++ b/src/msg/async/EventEpoll.cc
@@ -17,6 +17,7 @@
 #include "common/errno.h"
 #include <fcntl.h>
 #include "EventEpoll.h"
+#include "Timeout.h"
 
 #define dout_subsys ceph_subsys_ms
 
@@ -120,8 +121,7 @@ int EpollDriver::event_wait(std::vector<FiredFileEvent> &fired_events, struct ti
 {
   int retval, numevents = 0;
 
-  retval = epoll_wait(epfd, events, nevent,
-                      tvp ? (tvp->tv_sec*1000 + tvp->tv_usec/1000) : -1);
+  retval = epoll_wait(epfd, events, nevent, timeout_to_milliseconds(tvp));
   if (retval > 0) {
     numevents = retval;
     fired_events.resize(numevents);
diff --git a/src/msg/async/EventPoll.cc b/src/msg/async/EventPoll.cc
index 4c09dbb4db4..f46528715e3 100644
--- a/src/msg/async/EventPoll.cc
+++ b/src/msg/async/EventPoll.cc
@@ -15,6 +15,7 @@
 
 #include "common/errno.h"
 #include "EventPoll.h"
+#include "Timeout.h"
 
 #include <unistd.h>
 #define dout_subsys ceph_subsys_ms
@@ -161,11 +162,9 @@ int PollDriver::event_wait(std::vector<FiredFileEvent> &fired_events,
 			  struct timeval *tvp) {
   int retval, numevents = 0;
 #ifdef _WIN32
-  retval = WSAPoll(pfds, max_pfds,
-		      tvp ? (tvp->tv_sec*1000 + tvp->tv_usec/1000) : -1);
+  retval = WSAPoll(pfds, max_pfds, timeout_to_milliseconds(tvp));
 #else
-  retval = poll(pfds, max_pfds,
-		      tvp ? (tvp->tv_sec*1000 + tvp->tv_usec/1000) : -1);
+  retval = poll(pfds, max_pfds, timeout_to_milliseconds(tvp));
 #endif
   if (retval > 0) {
     for (int j = 0; j < max_pfds; j++) {
diff --git a/src/msg/async/Timeout.h b/src/msg/async/Timeout.h
new file mode 100644
index 00000000000..b8df1b40761
--- /dev/null
+++ b/src/msg/async/Timeout.h
@@ -0,0 +1,47 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2024 IONOS SE
+ *
+ * Author: Max Kellermann <max.kellermann@ionos.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MSG_TIMEOUT_H
+#define CEPH_MSG_TIMEOUT_H
+
+#include "include/intarith.h" // for div_round_up()
+
+#include <time.h> // for struct timeval
+
+/**
+ * Convert the given `struct timeval` to milliseconds.
+ *
+ * This is supposed to be used as timeout parameter to system calls
+ * such as poll() and epoll_wait().
+ */
+constexpr int
+timeout_to_milliseconds(const struct timeval &tv) noexcept
+{
+  /* round up to the next millisecond so we don't wake up too early */
+  return tv.tv_sec * 1000 + div_round_up(tv.tv_usec, 1000);
+}
+
+/**
+ * This overload makes the timeout optional; on nullptr, it returns
+ * -1.
+ */
+constexpr int
+timeout_to_milliseconds(const struct timeval *tv) noexcept
+{
+  return tv != nullptr ? timeout_to_milliseconds(*tv) : -1;
+}
+
+#endif
diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc
index 2f88acdc93b..50f293d45fd 100644
--- a/src/os/bluestore/BlueFS.cc
+++ b/src/os/bluestore/BlueFS.cc
@@ -3794,7 +3794,7 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/
   if (offset > fnode.size) {
     ceph_abort_msg("truncate up not supported");
   }
-  ceph_assert(offset <= fnode.size);
+
   _flush_bdev(h);
   {
     std::lock_guard ll(log.lock);
@@ -3803,44 +3803,42 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/
     vselector->sub_usage(h->file->vselector_hint, fnode);
     uint64_t x_off = 0;
     auto p = fnode.seek(offset, &x_off);
-    uint64_t cut_off =
-      (p == fnode.extents.end()) ? 0 : p2roundup(x_off, alloc_size[p->bdev]);
-    uint64_t new_allocated;
-    if (0 == cut_off) {
-      // whole pextent to remove
-      changed_extents = true;
-      new_allocated = offset;
-    } else if (cut_off < p->length) {
-      dirty.pending_release[p->bdev].insert(p->offset + cut_off, p->length - cut_off);
-      new_allocated = (offset - x_off) + cut_off;
-      p->length = cut_off;
-      changed_extents = true;
-      ++p;
-    } else {
-      ceph_assert(cut_off >= p->length);
-      new_allocated  = (offset - x_off) + p->length;
-      // just leave it here
-      ++p;
-    }
-    while (p != fnode.extents.end()) {
-      dirty.pending_release[p->bdev].insert(p->offset, p->length);
-      p = fnode.extents.erase(p);
-      changed_extents = true;
+    if (p != fnode.extents.end()) {
+      uint64_t cut_off = p2roundup(x_off, alloc_size[p->bdev]);
+      if (0 == cut_off) {
+        // whole pextent to remove
+        fnode.allocated = offset;
+        changed_extents = true;
+      } else if (cut_off < p->length) {
+        dirty.pending_release[p->bdev].insert(p->offset + cut_off,
+                                              p->length - cut_off);
+        fnode.allocated = (offset - x_off) + cut_off;
+        p->length = cut_off;
+        changed_extents = true;
+        ++p;
+      } else {
+        // cut_off > p->length means that we misaligned the extent
+        ceph_assert(cut_off == p->length);
+        fnode.allocated = (offset - x_off) + p->length;
+        ++p; // leave extent untouched
+      }
+      while (p != fnode.extents.end()) {
+        dirty.pending_release[p->bdev].insert(p->offset, p->length);
+        p = fnode.extents.erase(p);
+        changed_extents = true;
+      }
     }
     if (changed_extents) {
       fnode.size = offset;
-      fnode.allocated = new_allocated;
       fnode.reset_delta();
       fnode.recalc_allocated();
       log.t.op_file_update(fnode);
       // sad, but is_dirty must be set to signal flushing of the log
       h->file->is_dirty = true;
-    } else {
-      if (offset != fnode.size) {
-        fnode.size = offset;
-        //skipping log.t.op_file_update_inc, it will be done by flush()
-        h->file->is_dirty = true;
-      }
+    } else if (offset != fnode.size) {
+      fnode.size = offset;
+      // skipping log.t.op_file_update_inc, it will be done by flush()
+      h->file->is_dirty = true;
     }
     vselector->add_usage(h->file->vselector_hint, fnode);
   }
diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc
index 25e6c4fe596..8f1d995fa8d 100644
--- a/src/os/bluestore/BlueStore.cc
+++ b/src/os/bluestore/BlueStore.cc
@@ -6930,8 +6930,19 @@ int BlueStore::_check_main_bdev_label()
     return -EIO;
   }
   if (bluestore_bdev_label_require_all && r != 0) {
-    derr << __func__ << " not all labels read properly" << dendl;
-    return -EIO;
+    // We are about to complain that some labels failed.
+    // But in case if we expanded block device some labels will not be good.
+    uint64_t lsize = std::max(BDEV_LABEL_BLOCK_SIZE, min_alloc_size);
+    uint32_t valid_locations = 0;
+    for (uint64_t loc : bdev_label_positions) {
+      if (loc + lsize <= bdev_label.size) {
+        ++valid_locations;
+      }
+    }
+    if (valid_locations != bdev_label_valid_locations.size()) {
+      derr << __func__ << " not all labels read properly" << dendl;
+      return -EIO;
+    }
   }
   return 0;
 }
@@ -8967,11 +8978,25 @@ int BlueStore::expand_devices(ostream& out)
     _close_db_and_around();
 
     // mount in read/write to sync expansion changes
+    if (bdev_label_multi) {
+      // We need not do fsck, because we can be broken - size is increased,
+      // but we might not have labels set.
+      cct->_conf.set_val_or_die("bluestore_fsck_on_mount", "false");
+    }
     r = _mount();
     ceph_assert(r == 0);
     if (fm && fm->is_null_manager()) {
       // we grow the allocation range, must reflect it in the allocation file
       alloc->init_add_free(size0, size - size0);
+      if (bdev_label_multi) {
+        uint64_t lsize = std::max(BDEV_LABEL_BLOCK_SIZE, min_alloc_size);
+        for (uint64_t loc : bdev_label_positions) {
+          if ((loc >= size0) && (loc + lsize <= size)) {
+            bdev_label_valid_locations.push_back(loc);
+          }
+        }
+        _write_bdev_label(cct, bdev, path + "/block", bdev_label, bdev_label_valid_locations);
+      }
       need_to_destage_allocation_file = true;
     }
     umount();
diff --git a/src/pybind/mgr/cephadm/services/nvmeof.py b/src/pybind/mgr/cephadm/services/nvmeof.py
index b3fd526815e..8acec94f382 100644
--- a/src/pybind/mgr/cephadm/services/nvmeof.py
+++ b/src/pybind/mgr/cephadm/services/nvmeof.py
@@ -47,6 +47,7 @@ class NvmeofService(CephService):
 
         # TODO: check if we can force jinja2 to generate dicts with double quotes instead of using json.dumps
         transport_tcp_options = json.dumps(spec.transport_tcp_options) if spec.transport_tcp_options else None
+        iobuf_options = json.dumps(spec.iobuf_options) if spec.iobuf_options else None
         name = '{}.{}'.format(utils.name_to_config_section('nvmeof'), nvmeof_gw_id)
         rados_id = name[len('client.'):] if name.startswith('client.') else name
 
@@ -67,6 +68,7 @@ class NvmeofService(CephService):
             'rpc_socket_dir': '/var/tmp/',
             'rpc_socket_name': 'spdk.sock',
             'transport_tcp_options': transport_tcp_options,
+            'iobuf_options': iobuf_options,
             'rados_id': rados_id
         }
         gw_conf = self.mgr.template.render('services/nvmeof/ceph-nvmeof.conf.j2', context)
diff --git a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2
index 37f2db52732..2a9ab309568 100644
--- a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2
+++ b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2
@@ -86,6 +86,9 @@ transport_tcp_options = {{ transport_tcp_options }}
 {% if spec.tgt_cmd_extra_args %}
 tgt_cmd_extra_args = {{ spec.tgt_cmd_extra_args }}
 {% endif %}
+{% if iobuf_options %}
+iobuf_options = {{ iobuf_options }}
+{% endif %}
 
 [monitor]
 timeout = {{ spec.monitor_timeout }}
diff --git a/src/pybind/rados/rados.pyx b/src/pybind/rados/rados.pyx
index b54ebb483c6..bcfa6777f3d 100644
--- a/src/pybind/rados/rados.pyx
+++ b/src/pybind/rados/rados.pyx
@@ -1870,7 +1870,7 @@ cdef class WriteOp(object):
             uint64_t _offset = offset
 
         with nogil:
-            rados_write_op_zero(self.write_op, _length, _offset)
+            rados_write_op_zero(self.write_op, _offset, _length)
 
     def truncate(self, offset: int):
         """
diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py
index 1ac9fa49e32..6869d5b2188 100644
--- a/src/python-common/ceph/deployment/service_spec.py
+++ b/src/python-common/ceph/deployment/service_spec.py
@@ -1384,6 +1384,7 @@ class NvmeofServiceSpec(ServiceSpec):
                  transport_tcp_options: Optional[Dict[str, int]] =
                  {"in_capsule_data_size": 8192, "max_io_qpairs_per_ctrlr": 7},
                  tgt_cmd_extra_args: Optional[str] = None,
+                 iobuf_options: Optional[Dict[str, int]] = None,
                  discovery_addr: Optional[str] = None,
                  discovery_addr_map: Optional[Dict[str, str]] = None,
                  discovery_port: Optional[int] = None,
@@ -1520,6 +1521,8 @@ class NvmeofServiceSpec(ServiceSpec):
         self.transport_tcp_options: Optional[Dict[str, int]] = transport_tcp_options
         #: ``tgt_cmd_extra_args`` extra arguments for the nvmf_tgt process
         self.tgt_cmd_extra_args = tgt_cmd_extra_args
+        #: List of extra arguments for SPDK iobuf in the form opt=value
+        self.iobuf_options: Optional[Dict[str, int]] = iobuf_options
         #: ``discovery_addr`` address of the discovery service
         self.discovery_addr = discovery_addr
         #: ``discovery_addr_map`` per node address map of the discovery service
diff --git a/src/rgw/driver/rados/rgw_data_sync.cc b/src/rgw/driver/rados/rgw_data_sync.cc
index c0a9059a251..1302f278f59 100644
--- a/src/rgw/driver/rados/rgw_data_sync.cc
+++ b/src/rgw/driver/rados/rgw_data_sync.cc
@@ -3021,7 +3021,7 @@ public:
 
           if (!dest_bucket_perms.verify_bucket_permission(dest_key.value_or(key), rgw::IAM::s3PutObject)) {
             ldout(cct, 0) << "ERROR: " << __func__ << ": permission check failed: user not allowed to write into bucket (bucket=" << sync_pipe.info.dest_bucket.get_key() << ")" << dendl;
-            return -EPERM;
+            return set_cr_error(-EPERM);
           }
         }
 
@@ -4520,7 +4520,7 @@ public:
           }
           tn->set_resource_name(SSTR(bucket_str_noinstance(bs.bucket) << "/" << key));
         }
-        if (retcode == -ERR_PRECONDITION_FAILED || retcode == -EPERM) {
+        if (retcode == -ERR_PRECONDITION_FAILED || retcode == -EPERM || retcode == -EACCES) {
 	  pretty_print(sc->env, "Skipping object s3://{}/{} in sync from zone {}\n",
 		       bs.bucket.name, key, zone_name);
           set_status("Skipping object sync: precondition failed (object contains newer change or policy doesn't allow sync)");
diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc
index 69075c506f1..a183feabe2a 100644
--- a/src/rgw/driver/rados/rgw_rados.cc
+++ b/src/rgw/driver/rados/rgw_rados.cc
@@ -8951,7 +8951,7 @@ int RGWRados::get_olh(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info,
     return r;
   }
 
-  auto iter = attrset.find(RGW_ATTR_OLH_VER);
+  auto iter = attrset.find(RGW_ATTR_OLH_INFO);
   if (iter == attrset.end()) { /* not an olh */
     return -EINVAL;
   }
diff --git a/src/rgw/driver/rados/rgw_rest_log.cc b/src/rgw/driver/rados/rgw_rest_log.cc
index 9315dfc0afd..72216a471b3 100644
--- a/src/rgw/driver/rados/rgw_rest_log.cc
+++ b/src/rgw/driver/rados/rgw_rest_log.cc
@@ -1061,7 +1061,7 @@ void RGWOp_BILog_Status::execute(optional_yield y)
 
     if (!pipe.dest.bucket) {
       /* Uh oh, something went wrong */
-      ldpp_dout(this, 20) << "ERROR: RGWOp_BILog_Status::execute(optional_yield y): BUG: pipe.dest.bucket was not initialized" << pipe << dendl;
+      ldpp_dout(this, 0) << "ERROR: RGWOp_BILog_Status::execute(optional_yield y): BUG: pipe.dest.bucket was not initialized" << pipe << dendl;
       op_ret = -EIO;
       return;
     }
diff --git a/src/rgw/driver/rados/rgw_user.cc b/src/rgw/driver/rados/rgw_user.cc
index 894d8e40950..cce593c6bd5 100644
--- a/src/rgw/driver/rados/rgw_user.cc
+++ b/src/rgw/driver/rados/rgw_user.cc
@@ -189,6 +189,11 @@ static void dump_user_info(Formatter *f, RGWUserInfo &info,
   }
   encode_json("type", user_source_type, f);
   encode_json("mfa_ids", info.mfa_ids, f);
+  encode_json("account_id", info.account_id, f);
+  encode_json("path", info.path, f);
+  encode_json("create_date", info.create_date, f);
+  encode_json("tags", info.tags, f);
+  encode_json("group_ids", info.group_ids, f);
   if (stats) {
     encode_json("stats", *stats, f);
   }
diff --git a/src/rgw/radosgw-admin/radosgw-admin.cc b/src/rgw/radosgw-admin/radosgw-admin.cc
index 47b68d3f902..13936c87952 100644
--- a/src/rgw/radosgw-admin/radosgw-admin.cc
+++ b/src/rgw/radosgw-admin/radosgw-admin.cc
@@ -2543,8 +2543,8 @@ static void sync_status(Formatter *formatter)
 
 struct indented {
   int w; // indent width
-  std::string_view header;
-  indented(int w, std::string_view header = "") : w(w), header(header) {}
+  std::string header;
+  indented(int w, std::string header = "") : w(w), header(header) {}
 };
 std::ostream& operator<<(std::ostream& out, const indented& h) {
   return out << std::setw(h.w) << h.header << std::setw(1) << ' ';
@@ -2552,10 +2552,10 @@ std::ostream& operator<<(std::ostream& out, const indented& h) {
 
 struct bucket_source_sync_info {
   const RGWZone& _source;
-  std::string_view error;
+  std::string error;
   std::map<int,std::string> shards_behind;
   int total_shards;
-  std::string_view status;
+  std::string status;
   rgw_bucket bucket_source;
 
   bucket_source_sync_info(const RGWZone& source): _source(source) {}
@@ -3075,14 +3075,12 @@ static int bucket_sync_status(rgw::sal::Driver* driver, const RGWBucketInfo& inf
       }
       if (pipe.source.zone.value_or(rgw_zone_id()) == z->second.id) {
         bucket_source_sync_info source_sync_info(z->second);
-	auto ret = bucket_source_sync_status(dpp(), static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->zone->get_zone(), z->second,
+        bucket_source_sync_status(dpp(), static_cast<rgw::sal::RadosStore*>(driver), static_cast<rgw::sal::RadosStore*>(driver)->svc()->zone->get_zone(), z->second,
 				  c->second,
 				  info, pipe,
 				  source_sync_info);
 
-        if (ret == 0) {
-          bucket_sync_info.source_status_info.emplace_back(std::move(source_sync_info));
-        }
+        bucket_sync_info.source_status_info.emplace_back(std::move(source_sync_info));
       }
     }
   }
diff --git a/src/rgw/radosgw-admin/sync_checkpoint.cc b/src/rgw/radosgw-admin/sync_checkpoint.cc
index 0303ed6c747..97da99bdc27 100644
--- a/src/rgw/radosgw-admin/sync_checkpoint.cc
+++ b/src/rgw/radosgw-admin/sync_checkpoint.cc
@@ -228,6 +228,7 @@ int rgw_bucket_sync_checkpoint(const DoutPrefixProvider* dpp,
     }
     auto& entry = sources.emplace_back();
     entry.pipe = pipe;
+    entry.pipe.dest.bucket = info.bucket; // so it contains the bucket key (+bucket id)
 
     // fetch remote markers
     boost::asio::spawn(ioctx, [&] (boost::asio::yield_context yield) {
diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h
index 99f7db4f569..88f5f7a9c52 100644
--- a/src/rgw/rgw_common.h
+++ b/src/rgw/rgw_common.h
@@ -1428,6 +1428,7 @@ struct RGWBucketEnt {
   size_t size;
   size_t size_rounded;
   ceph::real_time creation_time;
+  ceph::real_time modification_time;
   uint64_t count;
 
   /* The placement_rule is necessary to calculate per-storage-policy statics
diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc
index 9f25d275852..d6f846b0d2f 100644
--- a/src/rgw/rgw_op.cc
+++ b/src/rgw/rgw_op.cc
@@ -8587,6 +8587,10 @@ void RGWGetBucketPolicy::execute(optional_yield y)
 
 void RGWDeleteBucketPolicy::send_response()
 {
+  if (!op_ret) {
+    /* A successful Delete Bucket Policy should return a 204 on success */
+    op_ret = STATUS_NO_CONTENT;
+  }
   if (op_ret) {
     set_req_state_err(s, op_ret);
   }
@@ -9262,4 +9266,3 @@ void rgw_slo_entry::decode_json(JSONObj *obj)
   JSONDecoder::decode_json("etag", etag, obj);
   JSONDecoder::decode_json("size_bytes", size_bytes, obj);
 };
-
diff --git a/src/rgw/rgw_rest_swift.cc b/src/rgw/rgw_rest_swift.cc
index b8ff3ca2fe8..88af0fc9c27 100644
--- a/src/rgw/rgw_rest_swift.cc
+++ b/src/rgw/rgw_rest_swift.cc
@@ -361,6 +361,7 @@ void RGWListBuckets_ObjStore_SWIFT::dump_bucket_entry(const RGWBucketEnt& ent)
   if (need_stats) {
     s->formatter->dump_int("count", ent.count);
     s->formatter->dump_int("bytes", ent.size);
+    dump_time(s, "last_modified", ent.modification_time);
   }
 
   s->formatter->close_section();
diff --git a/src/rgw/services/svc_bucket_sobj.cc b/src/rgw/services/svc_bucket_sobj.cc
index ca705c5a44d..0f4cd4e847b 100644
--- a/src/rgw/services/svc_bucket_sobj.cc
+++ b/src/rgw/services/svc_bucket_sobj.cc
@@ -556,7 +556,7 @@ int RGWSI_Bucket_SObj::read_bucket_stats(const rgw_bucket& bucket,
                                          const DoutPrefixProvider *dpp)
 {
   RGWBucketInfo bucket_info;
-  int ret = read_bucket_info(bucket, &bucket_info, nullptr, nullptr, boost::none, y, dpp);
+  int ret = read_bucket_info(bucket, &bucket_info, &ent->modification_time, nullptr, boost::none, y, dpp);
   if (ret < 0) {
     return ret;
   }
diff --git a/src/test/objectstore/test_bluefs.cc b/src/test/objectstore/test_bluefs.cc
index 60147b5397c..32173d61afe 100644
--- a/src/test/objectstore/test_bluefs.cc
+++ b/src/test/objectstore/test_bluefs.cc
@@ -1426,6 +1426,87 @@ TEST(BlueFS, test_concurrent_dir_link_and_compact_log_56210) {
   }
 }
 
+TEST(BlueFS, truncate_drops_allocations) {
+  constexpr uint64_t K = 1024;
+  constexpr uint64_t M = 1024 * K;
+  uuid_d fsid;
+  const char* DIR_NAME="dir";
+  const char* FILE_NAME="file1";
+  struct {
+    uint64_t preallocated_size;
+    uint64_t write_size;
+    uint64_t truncate_to;
+    uint64_t allocated_after_truncate;
+    uint64_t slow_size = 0;
+    uint64_t slow_alloc_size = 64*K;
+    uint64_t db_size = 128*M;
+    uint64_t db_alloc_size = 1*M;
+  } scenarios [] = {
+    // on DB(which is SLOW) : 1 => 1, 64K remains
+    { 1*M, 1, 1, 64*K },
+    // on DB(which is SLOW), alloc 4K : 1 => 1, 4K remains
+    { 1*M, 1, 1, 4*K, 0, 4*K },
+    // on DB(which is SLOW), truncation on AU boundary : 128K => 128K, 128K remains
+    { 1*M, 128*K, 128*K, 128*K },
+    // on DB(which is SLOW), no prealloc, truncation to 0 : 1666K => 0, 0 remains
+    { 0, 1666*K, 0, 0 },
+    // on DB, truncate to 123K, expect 1M occupied
+    { 1234*K, 123*K, 123*K, 1*M, 128*M, 64*K, 10*M, 1*M },
+    // on DB, truncate to 0, expect 0 occupied
+    { 1234*K, 345*K, 0, 0, 128*M, 64*K, 10*M, 1*M },
+    // on DB, truncate to AU boundary, expect exactly 1M occupied
+    { 1234*K, 1123*K, 1*M, 1*M, 128*M, 64*K, 10*M, 1*M },
+    // on DB and SLOW, truncate only data on SLOW
+    { 0, 10*M+1, 10*M+1, 10*M+64*K, 128*M, 64*K, 10*M, 1*M },
+    // on DB and SLOW, preallocate and truncate only data on SLOW
+    { 6*M, 12*M, 10*M+1, 10*M+64*K, 128*M, 64*K, 10*M, 1*M },
+    // on DB and SLOW, preallocate and truncate all in SLOW and some on DB
+    // note! prealloc 6M is important, one allocation for 12M will fallback to SLOW
+    // in 6M + 6M we can be sure that 6M is on DB and 6M is on SLOW
+    { 6*M, 12*M, 3*M+1, 4*M, 128*M, 64*K, 11*M, 1*M },
+  };
+  for (auto& s : scenarios) {
+    ConfSaver conf(g_ceph_context->_conf);
+    conf.SetVal("bluefs_shared_alloc_size", stringify(s.slow_alloc_size).c_str());
+    conf.SetVal("bluefs_alloc_size", stringify(s.db_alloc_size).c_str());
+
+    g_ceph_context->_conf.set_val("bluefs_shared_alloc_size", stringify(s.slow_alloc_size));
+    g_ceph_context->_conf.set_val("bluefs_alloc_size", stringify(s.db_alloc_size));
+    TempBdev bdev_db{s.db_size};
+    TempBdev bdev_slow{s.slow_size};
+
+    BlueFS fs(g_ceph_context);
+    if (s.db_size != 0) {
+      ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, 0));
+    }
+    if (s.slow_size != 0) {
+      ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0));
+    }
+
+    ASSERT_EQ(0, fs.mkfs(fsid, {BlueFS::BDEV_DB, false, false}));
+    ASSERT_EQ(0, fs.mount());
+    ASSERT_EQ(0, fs.maybe_verify_layout({BlueFS::BDEV_DB, false, false}));
+    BlueFS::FileWriter *h;
+    ASSERT_EQ(0, fs.mkdir("dir"));
+    ASSERT_EQ(0, fs.open_for_write(DIR_NAME, FILE_NAME, &h, false));
+    uint64_t pre = fs.get_used();
+    ASSERT_EQ(0, fs.preallocate(h->file, 0, s.preallocated_size));
+    const std::string content(s.write_size, 'x');
+    h->append(content.c_str(), content.length());
+    fs.fsync(h);
+    ASSERT_EQ(0, fs.truncate(h, s.truncate_to));
+    fs.fsync(h);
+    uint64_t post = fs.get_used();
+    fs.close_writer(h);
+    EXPECT_EQ(pre, post - s.allocated_after_truncate);
+
+    fs.umount();
+  }
+}
+
+
+
+
 TEST(BlueFS, test_log_runway) {
   uint64_t max_log_runway = 65536;
   ConfSaver conf(g_ceph_context->_conf);
diff --git a/src/test/pybind/test_rados.py b/src/test/pybind/test_rados.py
index 25423bd8dcb..881b29c9152 100644
--- a/src/test/pybind/test_rados.py
+++ b/src/test/pybind/test_rados.py
@@ -516,6 +516,11 @@ class TestIoctx(object):
             eq(self.ioctx.read('write_ops'), b'12\x00\x005')
 
             write_op.write_full(b'12345')
+            write_op.zero(0, 2)
+            self.ioctx.operate_write_op(write_op, "write_ops")
+            eq(self.ioctx.read('write_ops'), b'\x00\x00345')
+
+            write_op.write_full(b'12345')
             write_op.truncate(2)
             self.ioctx.operate_write_op(write_op, "write_ops")
             eq(self.ioctx.read('write_ops'), b'12')