217 files changed, 3348 insertions, 629 deletions
diff --git a/qa/Makefile b/qa/Makefile
index ad655b7e743..05dc834adbd 100644
--- a/qa/Makefile
+++ b/qa/Makefile
@@ -1,4 +1,4 @@
-DIRS= workunits btrfs
+DIRS= workunits
 
 all:
 	for d in $(DIRS) ; do ( cd $$d ; $(MAKE) all ) ; done
diff --git a/qa/btrfs/.gitignore b/qa/btrfs/.gitignore
deleted file mode 100644
index 530c1b5b4ed..00000000000
--- a/qa/btrfs/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-/clone_range
-/test_async_snap
-/create_async_snap
diff --git a/qa/btrfs/Makefile b/qa/btrfs/Makefile
deleted file mode 100644
index be95ecfd3cd..00000000000
--- a/qa/btrfs/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-CFLAGS = -Wall -Wextra -D_GNU_SOURCE
-
-TARGETS = clone_range test_async_snap create_async_snap
-
-.c:
-	$(CC) $(CFLAGS) $@.c -o $@
-
-all:	$(TARGETS)
-
-clean:
-	rm $(TARGETS)
diff --git a/qa/btrfs/clone_range.c b/qa/btrfs/clone_range.c
deleted file mode 100644
index 0a88e160131..00000000000
--- a/qa/btrfs/clone_range.c
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <fcntl.h>
-#include <stdlib.h>
-#include <sys/ioctl.h>
-#include <string.h>
-
-#include <linux/types.h>
-#include "../../src/os/btrfs_ioctl.h"
-#include <stdio.h>
-#include <errno.h>
-
-int main(int argc, char **argv)
-{
-        struct btrfs_ioctl_clone_range_args ca;
-        int dfd;
-        int r;
-
-	if (argc < 6) {
-		printf("usage: %s <srcfn> <srcoffset> <srclen> <destfn> <destoffset>\n", argv[0]);
-		exit(1);
-	}
-
-        ca.src_fd = open(argv[1], O_RDONLY);
-        ca.src_offset = atoi(argv[2]);
-        ca.src_length = atoi(argv[3]);
-        dfd = open(argv[4], O_WRONLY|O_CREAT);
-        ca.dest_offset = atoi(argv[5]);
-
-        r = ioctl(dfd, BTRFS_IOC_CLONE_RANGE, &ca);
-        printf("clone_range %s %lld %lld~%lld to %s %d %lld = %d %s\n",
-               argv[1], ca.src_fd,
-	       ca.src_offset, ca.src_length,
-	       argv[4], dfd,
-               ca.dest_offset, r, strerror(errno));
-        return r;
-}
diff --git a/qa/btrfs/create_async_snap.c b/qa/btrfs/create_async_snap.c
deleted file mode 100644
index 2ef22af7b45..00000000000
--- a/qa/btrfs/create_async_snap.c
+++ /dev/null
@@ -1,34 +0,0 @@
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <stdio.h>
-#include <sys/ioctl.h>
-#include <string.h>
-
-#include <linux/ioctl.h>
-#include <linux/types.h>
-#include "../../src/os/btrfs_ioctl.h"
-
-struct btrfs_ioctl_vol_args_v2 va;
-
-int main(int argc, char **argv)
-{
-	int fd;
-	int r;
-
-	if (argc != 3) {
-		printf("usage: %s <source subvol> <name>\n", argv[0]);
-		return 1;
-	}
-	printf("creating snap ./%s from %s\n", argv[2], argv[1]);
-	fd = open(".", O_RDONLY);
-	va.fd = open(argv[1], O_RDONLY);
-	va.flags = BTRFS_SUBVOL_CREATE_ASYNC;
-	strcpy(va.name, argv[2]);
-	r = ioctl(fd, BTRFS_IOC_SNAP_CREATE_V2, (unsigned long long)&va);
-	printf("result %d\n", r ? -errno:0);
-	return r;
-}
diff --git a/qa/btrfs/test_async_snap.c b/qa/btrfs/test_async_snap.c
deleted file mode 100644
index 211be95a61c..00000000000
--- a/qa/btrfs/test_async_snap.c
+++ /dev/null
@@ -1,83 +0,0 @@
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <stdio.h>
-#include <sys/ioctl.h>
-#include <string.h>
-
-#include <linux/ioctl.h>
-#include <linux/types.h>
-#include "../../src/os/btrfs_ioctl.h"
-
-struct btrfs_ioctl_vol_args_v2 va;
-struct btrfs_ioctl_vol_args vold;
-int max = 4;
-
-void check_return(int r)
-{
-	if (r < 0) {
-		printf("********* failed with %d %s ********\n", errno, strerror(errno));
-		exit(1);
-	}
-}
-
-int main(int argc, char **argv)
-{
-	int num = 1000;
-
-	if (argc > 1)
-		num = atoi(argv[1]);
-	printf("will do %d iterations\n", num);
-
-        int cwd = open(".", O_RDONLY);
-        printf("cwd = %d\n", cwd);
-        while (num-- > 0) {
-		if (rand() % 10 == 0) {
-			__u64 transid;
-			int r;
-			printf("sync starting\n");
-			r = ioctl(cwd, BTRFS_IOC_START_SYNC, &transid);
-			check_return(r);
-			printf("sync started, transid %lld, waiting\n", transid);
-			r = ioctl(cwd, BTRFS_IOC_WAIT_SYNC, &transid);
-			check_return(r);
-			printf("sync finished\n");	
-		}
-
-                int i = rand() % max;
-                struct stat st;
-                va.fd = cwd;
-                sprintf(va.name, "test.%d", i);
-                va.transid = 0;
-                int r = stat(va.name, &st);
-                if (r < 0) {
-			if (rand() % 3 == 0) {
-				printf("snap create (sync) %s\n", va.name);
-				va.flags = 0;
-				r = ioctl(cwd, BTRFS_IOC_SNAP_CREATE_V2, &va);
-				check_return(r);
-			} else {
-				printf("snap create (async) %s\n", va.name);
-				va.flags = BTRFS_SUBVOL_CREATE_ASYNC;
-				r = ioctl(cwd, BTRFS_IOC_SNAP_CREATE_V2, &va);
-				check_return(r);
-				printf("snap created, transid %lld\n", va.transid);
-				if (rand() % 2 == 0) {
-					printf("waiting for async snap create\n");
-					r = ioctl(cwd, BTRFS_IOC_WAIT_SYNC, &va.transid);
-					check_return(r);
-				}
-                        }
-                } else {
-                        printf("snap remove %s\n", va.name);
-			vold.fd = va.fd;
-			strcpy(vold.name, va.name);
-                        r = ioctl(cwd, BTRFS_IOC_SNAP_DESTROY, &vold);
-			check_return(r);
-                }
-        }
-	return 0;
-}
diff --git a/qa/btrfs/test_rmdir_async_snap.c b/qa/btrfs/test_rmdir_async_snap.c
deleted file mode 100644
index 5dafaacaaeb..00000000000
--- a/qa/btrfs/test_rmdir_async_snap.c
+++ /dev/null
@@ -1,62 +0,0 @@
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <stdio.h>
-#include <sys/ioctl.h>
-#include <string.h>
-
-#include <linux/ioctl.h>
-#include <linux/types.h>
-#include "../../src/os/btrfs_ioctl.h"
-
-struct btrfs_ioctl_vol_args_v2 va;
-struct btrfs_ioctl_vol_args vold;
-
-int main(int argc, char **argv)
-{
-	int num = 1000;
-	int i, r, fd;
-	char buf[30];
-
-	if (argc > 1)
-		num = atoi(argv[1]);
-	printf("will do %d iterations\n", num);
-	
-	fd = open(".", O_RDONLY);
-	vold.fd = 0;
-	strcpy(vold.name, "current");
-	r = ioctl(fd, BTRFS_IOC_SUBVOL_CREATE, (unsigned long int)&vold);
-	printf("create current ioctl got %d\n", r ? errno:0);
-	if (r)
-		return 1;
-
-	for (i=0; i<num; i++) {
-		sprintf(buf, "current/dir.%d", i);
-		r = mkdir(buf, 0755);
-		printf("mkdir got %d\n", r ? errno:0);
-		if (r)
-			return 1;
-	}
-
-	va.fd = open("current", O_RDONLY);
-	va.flags = BTRFS_SUBVOL_CREATE_ASYNC;
-	for (i=0; i<num; i++) {
-		system("/bin/cp /boot/vmlinuz-3.2.0-ceph-00142-g9e98323 current/foo");
-		sprintf(buf, "current/dir.%d", i);
-		r = rmdir(buf);
-		printf("rmdir got %d\n", r ? errno:0);
-		if (r)
-			return 1;
-
-		if (i % 10) continue;
-		sprintf(va.name, "snap.%d", i);
-		r = ioctl(fd, BTRFS_IOC_SNAP_CREATE_V2, (unsigned long long)&va);
-		printf("ioctl got %d\n", r ? errno:0);
-		if (r)
-			return 1;
-	}
-	return 0;
-}
diff --git a/qa/cephfs/begin/3-kernel.yaml b/qa/cephfs/begin/3-kernel.yaml
new file mode 100644
index 00000000000..e94a0d87dc8
--- /dev/null
+++ b/qa/cephfs/begin/3-kernel.yaml
@@ -0,0 +1,23 @@
+# When the --kernel option is given to teuthology-suite, the kernel is set for
+# all nodes (also, the kernel is "distro" when the --kernel option is not set).
+# We don't generally want to use a custom kernel for all tests, so unset it.
+# The k-testing.yaml will set it, if given, for only the client nodes.
+#
+# Allow overriding this by using a branch ending in "-all".
+
+teuthology:
+  postmerge:
+    - |
+      local branch = yaml.kernel.branch
+      if branch and not yaml.kernel.branch:find "-all$" then
+        log.debug("removing default kernel specification: %s", yaml.kernel)
+        py_attrgetter(yaml.kernel).pop('branch', nil)
+        py_attrgetter(yaml.kernel).pop('deb', nil)
+        py_attrgetter(yaml.kernel).pop('flavor', nil)
+        py_attrgetter(yaml.kernel).pop('kdb', nil)
+        py_attrgetter(yaml.kernel).pop('koji', nil)
+        py_attrgetter(yaml.kernel).pop('koji_task', nil)
+        py_attrgetter(yaml.kernel).pop('rpm', nil)
+        py_attrgetter(yaml.kernel).pop('sha1', nil)
+        py_attrgetter(yaml.kernel).pop('tag', nil)
+      end
diff --git a/qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml b/qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml
index 2ee219125e7..048cd5ce8b9 100644
--- a/qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml
+++ b/qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml
@@ -1,3 +1,12 @@
+teuthology:
+  premerge: |
+    log.debug("base kernel %s", base_config.kernel)
+    local kernel = base_config.kernel
+    if kernel.branch ~= "distro" then
+      log.debug("overriding testing kernel with %s", kernel)
+      yaml_fragment.kernel.client = kernel
+    end
+
 kernel:
   client:
     branch: testing
diff --git a/qa/cephfs/overrides/ignorelist_health.yaml b/qa/cephfs/overrides/ignorelist_health.yaml
index 94b42579777..5ac25a8f790 100644
--- a/qa/cephfs/overrides/ignorelist_health.yaml
+++ b/qa/cephfs/overrides/ignorelist_health.yaml
@@ -24,3 +24,4 @@ overrides:
       - BLUESTORE_SLOW_OP_ALERT
       - slow operation indications in BlueStore
       - experiencing slow operations in BlueStore
+      - MGR_MODULE_ERROR
diff --git a/qa/config/crimson_bluestore.yaml b/qa/config/crimson_bluestore.yaml
new file mode 100644
index 00000000000..d5ba487b9bf
--- /dev/null
+++ b/qa/config/crimson_bluestore.yaml
@@ -0,0 +1,25 @@
+overrides:
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        # crimson's osd objectstore option
+        crimson osd objectstore: bluestore
+        debug alienstore: 20
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore compression mode: aggressive
+        bluestore fsck on mount: true
+        bluestore compression algorithm: snappy
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+        bluestore rocksdb cf: false
+        log to stderr: true
+        err to stderr: true
+        log flush on exit: true
+        log to file: false
diff --git a/qa/config/crimson_qa_overrides.yaml b/qa/config/crimson_qa_overrides.yaml
index fa8f49a4986..a10c59d77cc 100644
--- a/qa/config/crimson_qa_overrides.yaml
+++ b/qa/config/crimson_qa_overrides.yaml
@@ -9,6 +9,7 @@ overrides:
         osd pool default crimson: true
       osd:
         crimson osd obc lru size: 10
+        debug ms: 20
     flavor: crimson
   workunit:
     env:
diff --git a/qa/config/seastore.yaml b/qa/config/crimson_seastore.yaml
index 6158563eedf..d1919456ab1 100644
--- a/qa/config/seastore.yaml
+++ b/qa/config/crimson_seastore.yaml
@@ -1,13 +1,13 @@
 overrides:
   ceph:
-    fs: xfs
     conf:
       osd:
-        osd objectstore: seastore
+        # crimson's osd objectstore option
+        crimson osd objectstore: seastore
         debug seastore: 20
         debug seastore onode: 20
         debug seastore odata: 20
-        debug seastore ompap: 20
+        debug seastore omap: 20
         debug seastore tm: 20
         debug seastore t: 20
         debug seastore cleaner: 20
diff --git a/qa/crontab/teuthology-cronjobs b/qa/crontab/teuthology-cronjobs
index c979e5b105f..c558a1382ef 100644
--- a/qa/crontab/teuthology-cronjobs
+++ b/qa/crontab/teuthology-cronjobs
@@ -52,7 +52,6 @@ TEUTHOLOGY_SUITE_ARGS="--non-interactive --newest=100 --ceph-repo=https://git.ce
 00 05  * * 0,2,4   $CW $SS      1 --ceph main    --suite smoke -p 100 --force-priority
 08 05  * * 0       $CW $SS      1 --ceph squid   --suite smoke -p 100 --force-priority
 16 05  * * 0       $CW $SS      1 --ceph reef    --suite smoke -p 100 --force-priority
-24 05  * * 0       $CW $SS      1 --ceph quincy  --suite smoke -p 100 --force-priority
 
 ## ********** windows tests on main branch - weekly
 # 00 03 * * 1 CEPH_BRANCH=main; MACHINE_NAME=smithi; $CW teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s windows -k distro -e $CEPH_QA_EMAIL
@@ -122,7 +121,6 @@ TEUTHOLOGY_SUITE_ARGS="--non-interactive --newest=100 --ceph-repo=https://git.ce
 16 00 * * 1        $CW $SS      1 --ceph quincy --suite upgrade-clients/client-upgrade-pacific-quincy --suite-branch pacific -p 820
 24 00 * * 1        $CW $SS 120000 --ceph quincy --suite upgrade:octopus-x -p 820
 32 00 * * 1        $CW $SS 120000 --ceph quincy --suite upgrade:pacific-x -p 820
-40 00 * * 1        $CW $SS      1 --ceph quincy --suite upgrade/quincy-p2p -p 820
 
 ### upgrade runs for reef release
 ###### on smithi
diff --git a/qa/rgw/s3tests-branch.yaml b/qa/rgw/s3tests-branch.yaml
index ef6819c87e0..8710ce35893 100644
--- a/qa/rgw/s3tests-branch.yaml
+++ b/qa/rgw/s3tests-branch.yaml
@@ -1,4 +1,4 @@
 overrides:
   s3tests:
-    force-branch: ceph-master
-    # git_remote: https://github.com/ceph/
+     force-branch: ceph-master
+     # git_remote: https://github.com/ceph/
diff --git a/qa/standalone/ceph-helpers.sh b/qa/standalone/ceph-helpers.sh
index 82bf7391a7d..72d70ca7ad5 100755
--- a/qa/standalone/ceph-helpers.sh
+++ b/qa/standalone/ceph-helpers.sh
@@ -1888,7 +1888,6 @@ function repair() {
     local last_scrub=$(get_last_scrub_stamp $pgid)
     ceph pg repair $pgid
     wait_for_scrub $pgid "$last_scrub"
-    sleep 2
 }
 
 function test_repair() {
@@ -1902,7 +1901,7 @@ function test_repair() {
     wait_for_clean || return 1
     repair 1.0 || return 1
     kill_daemons $dir KILL osd || return 1
-    ! TIMEOUT=1 repair 1.0 || return 1
+    ! TIMEOUT=2 repair 1.0 || return 1
     teardown $dir || return 1
 }
 #######################################################################
@@ -1949,7 +1948,7 @@ function test_pg_scrub() {
     wait_for_clean || return 1
     pg_scrub 1.0 || return 1
     kill_daemons $dir KILL osd || return 1
-    ! TIMEOUT=1 pg_scrub 1.0 || return 1
+    ! TIMEOUT=2 pg_scrub 1.0 || return 1
     teardown $dir || return 1
 }
 
@@ -2089,7 +2088,7 @@ function test_wait_for_scrub() {
     wait_for_scrub $pgid "$last_scrub" || return 1
     kill_daemons $dir KILL osd || return 1
     last_scrub=$(get_last_scrub_stamp $pgid)
-    ! TIMEOUT=1 wait_for_scrub $pgid "$last_scrub" || return 1
+    ! TIMEOUT=2 wait_for_scrub $pgid "$last_scrub" || return 1
     teardown $dir || return 1
 }
 
diff --git a/qa/standalone/mon/mon-cluster-log.sh b/qa/standalone/mon/mon-cluster-log.sh
index 863a97c7cab..7b9adda0af6 100755
--- a/qa/standalone/mon/mon-cluster-log.sh
+++ b/qa/standalone/mon/mon-cluster-log.sh
@@ -62,7 +62,7 @@ function TEST_cluster_log_level() {
     ceph config set mon.a mon_cluster_log_level info
     ceph osd down 0
     TIMEOUT=20 wait_for_osd up 0 || return 1
-    grep -q "cluster [[]INF[]] osd.0.*boot" $dir/log
+    TIMEOUT=60 wait_for_string $dir/log "cluster [[]INF[]] osd.0.*boot" 
     return_code=$?
     if [ $return_code -ne 0 ]; then
       echo "Failed : Could not find INF log in the cluster log file"
@@ -145,9 +145,17 @@ function TEST_journald_cluster_log_level() {
     ceph osd down 0
     TIMEOUT=20 wait_for_osd up 0 || return 1
     search_str="osd.0.*boot"
-    sudo journalctl _COMM=ceph-mon CEPH_CHANNEL=cluster PRIORITY=6 --output=json-pretty --since "60 seconds ago" |jq '.MESSAGE' > $dir/journal.log
-    grep -q "$search_str" $dir/journal.log
-    return_code=$?
+    return_code=1
+    RETRY_DURATION=60 
+    for ((i=0; i < $RETRY_DURATION; i++)); do
+      sudo journalctl _COMM=ceph-mon CEPH_CHANNEL=cluster PRIORITY=6 --output=json-pretty --since "60 seconds ago" |jq '.MESSAGE' > $dir/journal.log
+      if ! grep "$search_str" $dir/journal.log; then
+        sleep 1
+      else
+        return_code=0
+        break
+      fi
+    done
     if [ $return_code -ne 0 ]; then
       echo "Failed : Could not find INF log in the journalctl log file"
       ERRORS=$(($ERRORS + 1))
diff --git a/qa/standalone/osd-backfill/osd-backfill-space.sh b/qa/standalone/osd-backfill/osd-backfill-space.sh
index 6a5c69412f4..84b9703bbfc 100755
--- a/qa/standalone/osd-backfill/osd-backfill-space.sh
+++ b/qa/standalone/osd-backfill/osd-backfill-space.sh
@@ -609,9 +609,16 @@ function TEST_backfill_grow() {
 
     wait_for_clean || return 1
 
+    #Capture the timestamp after complete cleanup or finish the recovery progress
+    current_timestamp=$(date +"%Y-%m-%dT%H:%M:%S")
+
     delete_pool $poolname
     kill_daemons $dir || return 1
-    ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
+
+    #Ignore the num_bytes mismatch messages before calling wait_cleanup
+    if ! awk -v ts="$current_timestamp" '$0 >= ts && /num_bytes mismatch/' $dir/osd.*.log > /dev/null; then
+	return 1
+    fi
 }
 
 # Create a 5 shard EC pool on 6 OSD cluster
diff --git a/qa/standalone/osd/osd-rep-recov-eio.sh b/qa/standalone/osd/osd-rep-recov-eio.sh
index 6fea441b3a9..a34f4a47189 100755
--- a/qa/standalone/osd/osd-rep-recov-eio.sh
+++ b/qa/standalone/osd/osd-rep-recov-eio.sh
@@ -219,6 +219,18 @@ function TEST_rados_repair_warning() {
     ceph health | $(! grep -q "Too many repaired reads on 1 OSDs") || return 1
     set +o pipefail
 
+    ceph health unmute OSD_TOO_MANY_REPAIRS
+    ceph tell osd.$primary clear_shards_repaired
+    sleep 10
+
+    set -o pipefail
+    # Should clear this
+    ceph health | $(! grep -q "Too many repaired reads on 1 OSDs") || return 1
+    set +o pipefail
+
+    ceph tell osd.$primary clear_shards_repaired $OBJS
+    sleep 10
+
     for i in $(seq 1 $OBJS)
      do
        inject_$inject rep data $poolname ${objbase}-$i $dir 0 || return 1
@@ -235,7 +247,7 @@ function TEST_rados_repair_warning() {
     COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
     test "$COUNT" = "$(expr $OBJS \* 3)" || return 1
 
-    # Give mon a chance to notice additional OSD and unmute
+    # Give mon a chance to notice additional OSD and reset num_shards_repaired
     # The default tick time is 5 seconds
     CHECKTIME=10
     LOOPS=0
diff --git a/qa/standalone/scrub/osd-recovery-scrub.sh b/qa/standalone/scrub/osd-recovery-scrub.sh
index 843e9b9901b..7b77a60f35b 100755
--- a/qa/standalone/scrub/osd-recovery-scrub.sh
+++ b/qa/standalone/scrub/osd-recovery-scrub.sh
@@ -163,7 +163,7 @@ function wait_for_scrub_mod() {
         fi
         sleep 1
         # are we still the primary?
-        local current_primary=`bin/ceph pg $pgid query | jq '.acting[0]' `
+        local current_primary=`./bin/ceph pg $pgid query | jq '.acting[0]' `
         if [ $orig_primary != $current_primary ]; then
             echo $orig_primary no longer primary for $pgid
             return 0
@@ -194,7 +194,7 @@ function pg_scrub_mod() {
 
     local last_scrub=$(get_last_scrub_stamp $pgid)
     # locate the primary
-    local my_primary=`bin/ceph pg $pgid query | jq '.acting[0]' `
+    local my_primary=`./bin/ceph pg $pgid query | jq '.acting[0]' `
     local recovery=false
     ceph pg scrub $pgid
     #ceph --format json pg dump pgs | jq ".pg_stats | .[] | select(.pgid == \"$pgid\") | .state"
diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh
index 491e46603f7..6dd5b10ae8f 100755
--- a/qa/standalone/scrub/osd-scrub-repair.sh
+++ b/qa/standalone/scrub/osd-scrub-repair.sh
@@ -5833,7 +5833,7 @@ function TEST_periodic_scrub_replicated() {
 
     flush_pg_stats
     # Request a regular scrub and it will be done
-    pg_schedule_scrub $pg
+    pg_scrub $pg
     grep -q "Regular scrub request, deep-scrub details will be lost" $dir/osd.${primary}.log || return 1
 
     # deep-scrub error is no longer present
diff --git a/qa/standalone/scrub/osd-scrub-test.sh b/qa/standalone/scrub/osd-scrub-test.sh
index 8015e023bdd..385479258f2 100755
--- a/qa/standalone/scrub/osd-scrub-test.sh
+++ b/qa/standalone/scrub/osd-scrub-test.sh
@@ -544,6 +544,9 @@ function TEST_dump_scrub_schedule() {
             --osd_op_queue=wpq \
             --osd_stats_update_period_not_scrubbing=1 \
             --osd_stats_update_period_scrubbing=1 \
+            --osd_scrub_retry_after_noscrub=1 \
+            --osd_scrub_retry_pg_state=2 \
+            --osd_scrub_retry_delay=2 \
             --osd_scrub_sleep=0.2"
 
     for osd in $(seq 0 $(expr $OSDS - 1))
@@ -600,17 +603,16 @@ function TEST_dump_scrub_schedule() {
     declare -A expct_dmp_duration=( ['dmp_last_duration']="0" ['dmp_last_duration_neg']="not0" )
     wait_any_cond $pgid 10 $saved_last_stamp expct_dmp_duration "WaitingAfterScrub_dmp " sched_data || return 1
 
-    sleep 2
-
     #
     # step 2: set noscrub and request a "periodic scrub". Watch for the change in the 'is the scrub
     #         scheduled for the future' value
     #
 
-    ceph tell osd.* config set osd_shallow_scrub_chunk_max "3" || return 1
-    ceph tell osd.* config set osd_scrub_sleep "2.0" || return 1
     ceph osd set noscrub || return 1
     sleep 2
+    ceph tell osd.* config set osd_shallow_scrub_chunk_max "3" || return 1
+    ceph tell osd.* config set osd_scrub_sleep "2.0" || return 1
+    sleep 8
     saved_last_stamp=${sched_data['query_last_stamp']}
 
     ceph tell $pgid schedule-scrub
@@ -683,6 +685,234 @@ function TEST_pg_dump_objects_scrubbed() {
     teardown $dir || return 1
 }
 
+function wait_initial_scrubs() {
+    local -n pg_to_prim_dict=$1
+    local extr_dbg=1 # note: 3 and above leave some temp files around
+
+    # set a long schedule for the periodic scrubs. Wait for the
+    # initial 'no previous scrub is known' scrubs to finish for all PGs.
+    ceph tell osd.* config set osd_scrub_min_interval 7200
+    ceph tell osd.* config set osd_deep_scrub_interval 14400
+    ceph tell osd.* config set osd_max_scrubs 32
+    ceph tell osd.* config set osd_scrub_sleep 0
+    ceph tell osd.* config set osd_shallow_scrub_chunk_max 10
+    ceph tell osd.* config set osd_scrub_chunk_max 10
+
+    for pg in "${!pg_to_prim_dict[@]}"; do
+      (( extr_dbg >= 1 )) && echo "Scheduling initial scrub for $pg"
+      ceph tell $pg scrub || return 1
+    done
+
+    sleep 1
+    (( extr_dbg >= 1 )) && ceph pg dump pgs --format=json-pretty | \
+      jq '.pg_stats | map(select(.last_scrub_duration == 0)) | map({pgid: .pgid, last_scrub_duration: .last_scrub_duration})'
+
+    tout=20
+    while [ $tout -gt 0 ] ; do
+      sleep 0.5
+      (( extr_dbg >= 2 )) && ceph pg dump pgs --format=json-pretty | \
+        jq '.pg_stats | map(select(.last_scrub_duration == 0)) | map({pgid: .pgid, last_scrub_duration: .last_scrub_duration})'
+      not_done=$(ceph pg dump pgs --format=json-pretty | \
+        jq '.pg_stats | map(select(.last_scrub_duration == 0)) | map({pgid: .pgid, last_scrub_duration: .last_scrub_duration})' | wc -l )
+      # note that we should ignore a header line
+      if [ "$not_done" -le 1 ]; then
+        break
+      fi
+      not_done=$(( (not_done - 2) / 4 ))
+      echo "Still waiting for $not_done PGs to finish initial scrubs (timeout $tout)"
+      tout=$((tout - 1))
+    done
+    (( tout == 0 )) && return 1
+    return 0
+}
+
+
+# Whenever a PG is being scrubbed at a regular, periodic, urgency, and is queued
+# for its replicas:
+# if the operator is requesting a scrub of the same PG, the operator's request
+# should trigger an abort of the ongoing scrub.
+#
+# The test process:
+# - a periodic scrub is initiated of a PG. That scrub is set to be a very slow one.
+# - a second PG, which shares some of its replicas, is intrcuted to be scrubbed. That one
+#   should be stuck in replica reservation. We will verify that.
+# - now - the operator is requesting that second PG to be scrubbed. The original (pending)
+#   scrub should be aborted. We would check for:
+#   - the new, operator's scrub to be scheduled
+#   - the replicas' reservers to be released
+function TEST_abort_periodic_for_operator() {
+    local dir=$1
+    local -A cluster_conf=(
+        ['osds_num']="5"
+        ['pgs_in_pool']="16"
+        ['pool_name']="test"
+    )
+    local extr_dbg=1 # note: 3 and above leave some temp files around
+
+    standard_scrub_wpq_cluster "$dir" cluster_conf 3 || return 1
+    local poolid=${cluster_conf['pool_id']}
+    local poolname=${cluster_conf['pool_name']}
+    echo "Pool: $poolname : $poolid"
+
+    #turn off '-x' (but remember previous state)
+    local saved_echo_flag=${-//[^x]/}
+    set +x
+
+    # fill the pool with some data
+    TESTDATA="testdata.$$"
+    dd if=/dev/urandom of=$TESTDATA bs=320 count=1
+    for i in $( seq 1 256 )
+    do
+        rados -p "$poolname" put "obj${i}" $TESTDATA 2>/dev/null 1>/dev/null
+    done
+    rm -f $TESTDATA
+    if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+
+    # create the dictionary of the PGs in the pool
+    declare -A pg_pr
+    declare -A pg_ac
+    declare -A pg_po
+    build_pg_dicts "$dir" pg_pr pg_ac pg_po "-"
+    (( extr_dbg >= 2 )) && echo "PGs table:"
+    for pg in "${!pg_pr[@]}"; do
+      (( extr_dbg >= 2 )) && echo "Got: $pg: ${pg_pr[$pg]} ( ${pg_ac[$pg]} ) ${pg_po[$pg]}"
+    done
+
+    wait_initial_scrubs pg_pr || return 1
+
+    # limit all OSDs to one scrub at a time
+    ceph tell osd.* config set osd_max_scrubs 1
+    ceph tell osd.* config set osd_stats_update_period_not_scrubbing 1
+
+    # configure for slow scrubs
+    ceph tell osd.* config set osd_scrub_sleep 3
+    ceph tell osd.* config set osd_shallow_scrub_chunk_max 2
+    ceph tell osd.* config set osd_scrub_chunk_max 2
+    (( extr_dbg >= 2 )) && ceph tell osd.2 dump_scrub_reservations --format=json-pretty
+
+    # the first PG to work with:
+    local pg1="1.0"
+    # and another one, that shares its primary, and at least one more active set member
+    local pg2=""
+     for pg in "${!pg_pr[@]}"; do
+      if [[ "${pg_pr[$pg]}" == "${pg_pr[$pg1]}" ]]; then
+        local -i common=0
+        count_common_active $pg $pg1 pg_ac common
+        if [[ $common -gt 1 ]]; then
+          pg2=$pg
+          break
+        fi
+      fi
+    done
+    if [[ -z "$pg2" ]]; then
+      # \todo handle the case when no such PG is found
+      echo "No PG found with the same primary as $pg1"
+      return 1
+    fi
+
+    # the common primary is allowed two concurrent scrubs
+    ceph tell osd."${pg_pr[$pg1]}" config set osd_max_scrubs 2
+    echo "The two PGs to manipulate are $pg1 and $pg2"
+
+    set_query_debug "$pg1"
+    # wait till the information published by pg1 is updated to show it as
+    # not being scrubbed
+    local is_act
+    for i in $( seq 1 3 )
+    do
+      is_act=$(ceph pg "$pg1" query | jq '.scrubber.active')
+      if [[ "$is_act" = "false" ]]; then
+          break
+      fi
+      echo "Still waiting for pg $pg1 to finish scrubbing"
+      sleep 0.7
+    done
+    ceph pg dump pgs
+    if [[ "$is_act" != "false" ]]; then
+      ceph pg "$pg1" query
+      echo "PG $pg1 appears to be still scrubbing"
+      return 1
+    fi
+    sleep 0.5
+
+    echo "Initiating a periodic scrub of $pg1"
+    (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
+    ceph tell $pg1 schedule-deep-scrub || return 1
+    sleep 1
+    (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
+
+    for i in $( seq 1 14 )
+    do
+      sleep 0.5
+      stt=$(ceph pg "$pg1" query | jq '.scrubber')
+      is_active=$(echo $stt | jq '.active')
+      is_reserving_replicas=$(echo $stt | jq '.is_reserving_replicas')
+      if [[ "$is_active" = "true" && "$is_reserving_replicas" = "false" ]]; then
+          break
+      fi
+      echo "Still waiting for pg $pg1 to start scrubbing: $stt"
+    done
+    if [[ "$is_active" != "true" || "$is_reserving_replicas" != "false" ]]; then
+      ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
+      echo "The scrub is not active or is reserving replicas"
+      return 1
+    fi
+    (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
+
+
+    # PG 1 is scrubbing, and has reserved the replicas - soem of which are shared
+    # by PG 2. As the max-scrubs was set to 1, that should prevent PG 2 from
+    # reserving its replicas.
+
+    (( extr_dbg >= 1 )) && ceph tell osd.* dump_scrub_reservations --format=json-pretty
+
+    # now - the 2'nd scrub - which should be blocked on reserving
+    set_query_debug "$pg2"
+    ceph tell "$pg2" schedule-deep-scrub
+    sleep 0.5
+    (( extr_dbg >= 2 )) && echo "===================================================================================="
+    (( extr_dbg >= 2 )) && ceph pg "$pg2" query -f json-pretty | jq '.scrubber'
+    (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
+    sleep 1
+    (( extr_dbg >= 2 )) && echo "===================================================================================="
+    (( extr_dbg >= 2 )) && ceph pg "$pg2" query -f json-pretty | jq '.scrubber'
+    (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
+
+    # make sure pg2 scrub is stuck in the reserving state
+    local stt2=$(ceph pg "$pg2" query | jq '.scrubber')
+    local pg2_is_reserving
+    pg2_is_reserving=$(echo $stt2 | jq '.is_reserving_replicas')
+    if [[ "$pg2_is_reserving" != "true" ]]; then
+      echo "The scheduled scrub for $pg2 should have been stuck"
+      ceph pg dump pgs
+      return 1
+    fi
+
+    # now - issue an operator-initiated scrub on pg2.
+    # The periodic scrub should be aborted, and the operator-initiated scrub should start.
+    echo "Instructing $pg2 to perform a high-priority scrub"
+    ceph tell "$pg2" scrub
+    for i in $( seq 1 10 )
+    do
+      sleep 0.5
+      stt2=$(ceph pg "$pg2" query | jq '.scrubber')
+      pg2_is_active=$(echo $stt2 | jq '.active')
+      pg2_is_reserving=$(echo $stt2 | jq '.is_reserving_replicas')
+      if [[ "$pg2_is_active" = "true" && "$pg2_is_reserving" != "true" ]]; then
+            break
+      fi
+      echo "Still waiting: $stt2"
+    done
+
+    if [[ "$pg2_is_active" != "true" || "$pg2_is_reserving" = "true" ]]; then
+      echo "The high-priority scrub for $pg2 is not active or is reserving replicas"
+      return 1
+    fi
+    echo "Done"
+}
+
+
+
 main osd-scrub-test "$@"
 
 # Local Variables:
diff --git a/qa/standalone/scrub/scrub-helpers.sh b/qa/standalone/scrub/scrub-helpers.sh
index 49b8346b8d2..dd37b643e08 100644
--- a/qa/standalone/scrub/scrub-helpers.sh
+++ b/qa/standalone/scrub/scrub-helpers.sh
@@ -240,8 +240,8 @@ function standard_scrub_cluster() {
     local saved_echo_flag=${-//[^x]/}
     set +x
 
-    run_mon $dir a --osd_pool_default_size=$OSDS || return 1
-    run_mgr $dir x || return 1
+    run_mon $dir a --osd_pool_default_size=3 || return 1
+    run_mgr $dir x --mgr_stats_period=1 || return 1
 
     local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
             --osd_scrub_interval_randomize_ratio=0 \
@@ -249,9 +249,12 @@ function standard_scrub_cluster() {
             --osd_pool_default_pg_autoscale_mode=off \
             --osd_pg_stat_report_interval_max_seconds=1 \
             --osd_pg_stat_report_interval_max_epochs=1 \
+            --osd_stats_update_period_not_scrubbing=3 \
+            --osd_stats_update_period_scrubbing=1 \
             --osd_scrub_retry_after_noscrub=5 \
             --osd_scrub_retry_pg_state=5 \
             --osd_scrub_retry_delay=3 \
+            --osd_pool_default_size=3 \
             $extra_pars"
 
     for osd in $(seq 0 $(expr $OSDS - 1))
@@ -297,6 +300,107 @@ function standard_scrub_wpq_cluster() {
 }
 
 
+# Parse the output of a 'pg dump pgs_brief' command and build a set of dictionaries:
+# - pg_primary_dict: a dictionary of pgid -> acting_primary
+# - pg_acting_dict: a dictionary of pgid -> acting set
+# - pg_pool_dict: a dictionary of pgid -> pool
+# If the input file is '-', the function will fetch the dump directly from the ceph cluster.
+function build_pg_dicts {
+  local dir=$1
+  local -n pg_primary_dict=$2
+  local -n pg_acting_dict=$3
+  local -n pg_pool_dict=$4
+  local infile=$5
+
+  local extr_dbg=0 # note: 3 and above leave some temp files around
+
+  #turn off '-x' (but remember previous state)
+  local saved_echo_flag=${-//[^x]/}
+  set +x
+
+  # if the infile name is '-', fetch the dump directly from the ceph cluster
+  if [[ $infile == "-" ]]; then
+    local -r ceph_cmd="ceph pg dump pgs_brief -f=json-pretty"
+    local -r ceph_cmd_out=$(eval $ceph_cmd)
+    local -r ceph_cmd_rc=$?
+    if [[ $ceph_cmd_rc -ne 0 ]]; then
+      echo "Error: the command '$ceph_cmd' failed with return code $ceph_cmd_rc"
+    fi
+    (( extr_dbg >= 3 )) && echo "$ceph_cmd_out" > /tmp/e2
+    l0=`echo "$ceph_cmd_out" | jq '[.pg_stats | group_by(.pg_stats)[0] | map({pgid: .pgid, pool: (.pgid | split(".")[0]), acting: .acting, acting_primary: .acting_primary})] | .[]' `
+  else
+    l0=`jq '[.pg_stats | group_by(.pg_stats)[0] | map({pgid: .pgid, pool: (.pgid | split(".")[0]), acting: .acting, acting_primary: .acting_primary})] | .[]' $infile `
+  fi
+  (( extr_dbg >= 2 )) && echo "L0: $l0"
+
+  mapfile -t l1 < <(echo "$l0" | jq -c '.[]')
+  (( extr_dbg >= 2 )) && echo "L1: ${#l1[@]}"
+
+  for item in "${l1[@]}"; do
+    pgid=$(echo "$item" | jq -r '.pgid')
+    acting=$(echo "$item" | jq -r '.acting | @sh')
+    pg_acting_dict["$pgid"]=$acting
+    acting_primary=$(echo "$item" | jq -r '.acting_primary')
+    pg_primary_dict["$pgid"]=$acting_primary
+    pool=$(echo "$item" | jq -r '.pool')
+    pg_pool_dict["$pgid"]=$pool
+  done
+
+  if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+}
+
+
+# a function that counts the number of common active-set elements between two PGs
+# 1 - the first PG
+# 2 - the second PG
+# 3 - the dictionary of active sets
+function count_common_active {
+  local pg1=$1
+  local pg2=$2
+  local -n pg_acting_dict=$3
+  local -n res=$4
+
+  local -a a1=(${pg_acting_dict[$pg1]})
+  local -a a2=(${pg_acting_dict[$pg2]})
+
+  local -i cnt=0
+  for i in "${a1[@]}"; do
+    for j in "${a2[@]}"; do
+      if [[ $i -eq $j ]]; then
+        cnt=$((cnt+1))
+      fi
+    done
+  done
+
+  res=$cnt
+}
+
+
+# given a PG, find another one with a disjoint active set
+# - but allow a possible common Primary
+# 1 - the PG
+# 2 - the dictionary of active sets
+# 3 - [out] - the PG with a disjoint active set
+function find_disjoint_but_primary {
+  local pg=$1
+  local -n ac_dict=$2
+  local -n p_dict=$3
+  local -n res=$4
+
+  for cand in "${!ac_dict[@]}"; do
+    if [[ "$cand" != "$pg" ]]; then
+      local -i common=0
+      count_common_active "$pg" "$cand" ac_dict common
+      if [[ $common -eq 0 || ( $common -eq 1 && "${p_dict[$pg]}" == "${p_dict[$cand]}" )]]; then
+        res=$cand
+        return
+      fi
+    fi
+  done
+}
+
+
+
 # A debug flag is set for the PG specified, causing the 'pg query' command to display
 # an additional 'scrub sessions counter' field.
 #
diff --git a/qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml b/qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml
index 7e7ede3e334..5be06bc6732 100644
--- a/qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml
+++ b/qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml
@@ -21,7 +21,6 @@ overrides:
         ceph_repository: dev
         ceph_mgr_modules:
           - status
-          - restful
         cephfs_pools:
           - name: "cephfs_data"
             pg_num: "64"
diff --git a/qa/suites/ceph-ansible/smoke/basic/4-tasks/rest.yaml b/qa/suites/ceph-ansible/smoke/basic/4-tasks/rest.yaml
deleted file mode 100644
index 8e389134b92..00000000000
--- a/qa/suites/ceph-ansible/smoke/basic/4-tasks/rest.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-tasks:
-- exec:
-    mgr.x:
-      - systemctl stop ceph-mgr.target
-      - sleep 5
-      - ceph -s
-- exec:
-    mon.a:
-      - ceph restful create-key admin
-      - ceph restful create-self-signed-cert
-      - ceph restful restart
-- workunit:
-    clients:
-      client.0:
-        - rest/test-restful.sh
diff --git a/qa/suites/cephmetrics/2-ceph/ceph_ansible.yaml b/qa/suites/cephmetrics/2-ceph/ceph_ansible.yaml
index 309f5060045..53e2b7fdbc8 100644
--- a/qa/suites/cephmetrics/2-ceph/ceph_ansible.yaml
+++ b/qa/suites/cephmetrics/2-ceph/ceph_ansible.yaml
@@ -20,7 +20,6 @@ overrides:
         ceph_repository: dev
         ceph_mgr_modules:
           - status
-          - restful
         cephfs_pools:
           - name: "cephfs_data"
             pg_num: "64"
diff --git a/qa/suites/crimson-rados-experimental/.qa b/qa/suites/crimson-rados-experimental/.qa
index fea2489fdf6..a602a0353e7 120000
--- a/qa/suites/crimson-rados-experimental/.qa
+++ b/qa/suites/crimson-rados-experimental/.qa
@@ -1 +1 @@
-../.qa
-\ No newline at end of file
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/centos_latest.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/centos_latest.yaml
deleted file mode 120000
index bd9854e7029..00000000000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/centos_latest.yaml
+++ /dev/null
@@ -1 +0,0 @@
-.qa/distros/supported/centos_latest.yaml
-\ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-1.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-1.yaml
deleted file mode 100644
index d8e5898b99f..00000000000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-1.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-overrides:
-  ceph-deploy:
-    conf:
-      global:
-        osd pool default size: 2
-        osd crush chooseleaf type: 0
-        osd pool default pg num:  128
-        osd pool default pgp num:  128
-  ceph:
-    conf:
-      osd:
-        osd shutdown pgref assert: true
-roles:
-- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0]
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/deploy/ceph.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/deploy/ceph.yaml
deleted file mode 100644
index c22f08eecf8..00000000000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/deploy/ceph.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-overrides:
-  install:
-    ceph:
-      flavor: crimson
-tasks:
-- install:
-- ceph:
-    conf:
-      osd:
-        debug monc: 20
-      mon:
-        mon min osdmap epochs: 50
-        paxos service trim min: 10
-        # prune full osdmaps regularly
-        mon osdmap full prune min: 15
-        mon osdmap full prune interval: 2
-        mon osdmap full prune txsize: 2
-    flavor: crimson
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/seastore.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/seastore.yaml
deleted file mode 120000
index 6a70c381709..00000000000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/seastore.yaml
+++ /dev/null
@@ -1 +0,0 @@
-.qa/config/seastore.yaml
-\ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/tasks/rados_api_tests.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/tasks/rados_api_tests.yaml
deleted file mode 100644
index ad8c921425b..00000000000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/tasks/rados_api_tests.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-overrides:
-  ceph:
-    log-ignorelist:
-    - reached quota
-    - but it is still running
-    - overall HEALTH_
-    - \(POOL_FULL\)
-    - \(SMALLER_PGP_NUM\)
-    - \(CACHE_POOL_NO_HIT_SET\)
-    - \(CACHE_POOL_NEAR_FULL\)
-    - \(POOL_APP_NOT_ENABLED\)
-    - \(PG_AVAILABILITY\)
-    - \(PG_DEGRADED\)
-    conf:
-      client:
-        debug ms: 1
-      mon:
-        mon warn on pool no app: false
-      osd:
-        osd class load list: "*"
-        osd class default list: "*"
-        osd blocked scrub grace period: 3600
-tasks:
-- workunit:
-    clients:
-      client.0:
-        - rados/test.sh
-        - rados/test_pool_quota.sh
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/tasks/readwrite.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/tasks/readwrite.yaml
deleted file mode 100644
index 25efcdac83d..00000000000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/tasks/readwrite.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-overrides:
-  ceph:
-    crush_tunables: optimal
-    conf:
-      mon:
-        mon osd initial require min compat client: luminous
-      osd:
-        osd_discard_disconnected_ops: false
-tasks:
-- rados:
-    clients: [client.0]
-    ops: 4000
-    objects: 500
-    max_attr_len: 8192
-    op_weights:
-      read: 45
-      write: 45
-      delete: 10
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/% b/qa/suites/crimson-rados-experimental/thrash/%
index e69de29bb2d..e69de29bb2d 100644
--- a/qa/suites/crimson-rados-experimental/seastore/basic/%
+++ b/qa/suites/crimson-rados-experimental/thrash/%
diff --git a/qa/suites/crimson-rados-experimental/seastore/.qa b/qa/suites/crimson-rados-experimental/thrash/.qa
index a602a0353e7..a602a0353e7 120000
--- a/qa/suites/crimson-rados-experimental/seastore/.qa
+++ b/qa/suites/crimson-rados-experimental/thrash/.qa
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/.qa b/qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/.qa
index a602a0353e7..a602a0353e7 120000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/.qa
+++ b/qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/.qa
diff --git a/qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled b/qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled
new file mode 120000
index 00000000000..5393a75548a
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled
@@ -0,0 +1 @@
+.qa/overrides/2-size-2-min-size.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml b/qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml
new file mode 120000
index 00000000000..5ff70eadf75
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml
@@ -0,0 +1 @@
+.qa/overrides/3-size-2-min-size.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/clusters/.qa b/qa/suites/crimson-rados-experimental/thrash/1-pg-log-overrides/.qa
index a602a0353e7..a602a0353e7 120000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/clusters/.qa
+++ b/qa/suites/crimson-rados-experimental/thrash/1-pg-log-overrides/.qa
diff --git a/qa/suites/fs/thrash/workloads/overrides/+ b/qa/suites/crimson-rados-experimental/thrash/1-pg-log-overrides/normal_pg_log.yaml
index e69de29bb2d..e69de29bb2d 100644
--- a/qa/suites/fs/thrash/workloads/overrides/+
+++ b/qa/suites/crimson-rados-experimental/thrash/1-pg-log-overrides/normal_pg_log.yaml
diff --git a/qa/suites/crimson-rados/thrash/1-pg-log-overrides/short_pg_log.yaml.disabled b/qa/suites/crimson-rados-experimental/thrash/1-pg-log-overrides/short_pg_log.yaml
index abd86d7d986..abd86d7d986 120000
--- a/qa/suites/crimson-rados/thrash/1-pg-log-overrides/short_pg_log.yaml.disabled
+++ b/qa/suites/crimson-rados-experimental/thrash/1-pg-log-overrides/short_pg_log.yaml
diff --git a/qa/suites/rados/rest/% b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/$
index e69de29bb2d..e69de29bb2d 100644
--- a/qa/suites/rados/rest/%
+++ b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/$
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/deploy/.qa b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/.qa
index a602a0353e7..a602a0353e7 120000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/deploy/.qa
+++ b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/.qa
diff --git a/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/default.yaml b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/default.yaml
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/default.yaml
diff --git a/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-active-recovery.yaml.disabled b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-active-recovery.yaml.disabled
new file mode 120000
index 00000000000..47afd70202d
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-active-recovery.yaml.disabled
@@ -0,0 +1 @@
+.qa/overrides/more-active-recovery.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-async-partial-recovery.yaml.disabled b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-async-partial-recovery.yaml.disabled
new file mode 100644
index 00000000000..0bbc72db754
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-async-partial-recovery.yaml.disabled
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_async_recovery_min_cost: 1
+        osd_object_clean_region_max_num_intervals: 1000
diff --git a/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-async-recovery.yaml.disabled b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-async-recovery.yaml.disabled
new file mode 100644
index 00000000000..4aed086bcc3
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-async-recovery.yaml.disabled
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_async_recovery_min_cost: 1
diff --git a/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-partial-recovery.yaml.disabled b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-partial-recovery.yaml.disabled
new file mode 100644
index 00000000000..88f15f2f691
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-partial-recovery.yaml.disabled
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_object_clean_region_max_num_intervals: 1000
diff --git a/qa/suites/crimson-rados-experimental/thrash/clusters/+ b/qa/suites/crimson-rados-experimental/thrash/clusters/+
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/clusters/+
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/.qa b/qa/suites/crimson-rados-experimental/thrash/clusters/.qa
index a602a0353e7..a602a0353e7 120000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/.qa
+++ b/qa/suites/crimson-rados-experimental/thrash/clusters/.qa
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-2.yaml b/qa/suites/crimson-rados-experimental/thrash/clusters/fixed-2.yaml
index 9774de6887b..79641f695ab 100644
--- a/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-2.yaml
+++ b/qa/suites/crimson-rados-experimental/thrash/clusters/fixed-2.yaml
@@ -6,6 +6,15 @@ overrides:
     conf:
       osd:
         osd shutdown pgref assert: true
+        crimson alien thread cpu cores: 6-7
+      osd.0:
+        crimson seastar cpu cores: 0-2
+      osd.1:
+        crimson seastar cpu cores: 3-5
+      osd.2:
+        crimson seastar cpu cores: 0-2
+      osd.3:
+        crimson seastar cpu cores: 3-5
       global:
         ms cluster mode: crc
         ms service mode: crc
diff --git a/qa/suites/crimson-rados-experimental/thrash/clusters/openstack.yaml.disabled b/qa/suites/crimson-rados-experimental/thrash/clusters/openstack.yaml.disabled
new file mode 100644
index 00000000000..e559d9126e8
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/clusters/openstack.yaml.disabled
@@ -0,0 +1,4 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 4
+      size: 10 # GB
diff --git a/qa/suites/crimson-rados-experimental/thrash/crimson-supported-all-distro b/qa/suites/crimson-rados-experimental/thrash/crimson-supported-all-distro
new file mode 120000
index 00000000000..a5b729b9efa
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/crimson-supported-all-distro
@@ -0,0 +1 @@
+.qa/distros/crimson-supported-all-distro/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/crimson_qa_overrides.yaml b/qa/suites/crimson-rados-experimental/thrash/crimson_qa_overrides.yaml
index 2bf67af1b18..2bf67af1b18 120000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/crimson_qa_overrides.yaml
+++ b/qa/suites/crimson-rados-experimental/thrash/crimson_qa_overrides.yaml
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/tasks/.qa b/qa/suites/crimson-rados-experimental/thrash/deploy/.qa
index a602a0353e7..a602a0353e7 120000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/tasks/.qa
+++ b/qa/suites/crimson-rados-experimental/thrash/deploy/.qa
diff --git a/qa/suites/crimson-rados-experimental/thrash/deploy/ceph.yaml b/qa/suites/crimson-rados-experimental/thrash/deploy/ceph.yaml
new file mode 100644
index 00000000000..ecad09cfe3a
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/deploy/ceph.yaml
@@ -0,0 +1,11 @@
+overrides:
+  install:
+    ceph:
+      flavor: crimson
+tasks:
+- install:
+- ceph:
+    conf:
+      osd:
+        debug monc: 20
+    flavor: crimson
diff --git a/qa/suites/crimson-rados-experimental/thrash/deploy/cephadm.yaml.disabled b/qa/suites/crimson-rados-experimental/thrash/deploy/cephadm.yaml.disabled
new file mode 100644
index 00000000000..0c2062240ee
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/deploy/cephadm.yaml.disabled
@@ -0,0 +1,16 @@
+# no need to verify os + flavor + sha1
+verify_ceph_hash: false
+tasks:
+- cephadm:
+    conf:
+      mgr:
+        debug ms: 1
+        debug mgr: 20
+        debug osd: 10
+- cephadm.shell:
+    mon.a:
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
diff --git a/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/.qa b/qa/suites/crimson-rados-experimental/thrash/objectstore/.qa
index a602a0353e7..a602a0353e7 120000
--- a/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/.qa
+++ b/qa/suites/crimson-rados-experimental/thrash/objectstore/.qa
diff --git a/qa/suites/crimson-rados-experimental/thrash/objectstore/seastore.yaml b/qa/suites/crimson-rados-experimental/thrash/objectstore/seastore.yaml
new file mode 120000
index 00000000000..61e26e7acf8
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/objectstore/seastore.yaml
@@ -0,0 +1 @@
+.qa/config/crimson_seastore.yaml
+\ No newline at end of file
diff --git a/qa/suites/rados/rest/.qa b/qa/suites/crimson-rados-experimental/thrash/thrashers/.qa
index a602a0353e7..a602a0353e7 120000
--- a/qa/suites/rados/rest/.qa
+++ b/qa/suites/crimson-rados-experimental/thrash/thrashers/.qa
diff --git a/qa/suites/crimson-rados-experimental/thrash/thrashers/default.yaml b/qa/suites/crimson-rados-experimental/thrash/thrashers/default.yaml
new file mode 100644
index 00000000000..aa44b6101ff
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/thrashers/default.yaml
@@ -0,0 +1,34 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    conf:
+      osd:
+        osd debug reject backfill probability: .3
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 3
+        osd snap trim sleep: 2
+        osd delete sleep: 1
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+tasks:
+- thrashosds:
+    timeout: 2400
+    dump_ops_enable: false
+    sighup_delay: 0
+    min_in: 3
+    noscrub_toggle_delay: 0
+    chance_thrash_pg_upmap: 0
+    reweight_osd: 0
+    thrash_primary_affinity: false
+    ceph_objectstore_tool: false
+    chance_inject_pause_short: 0
+    chance_thrash_cluster_full: 0
+    chance_reset_purged_snaps_last: 0
diff --git a/qa/suites/crimson-rados-experimental/thrash/thrashosds-health.yaml b/qa/suites/crimson-rados-experimental/thrash/thrashosds-health.yaml
new file mode 120000
index 00000000000..9124eb1aa29
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/.qa b/qa/suites/crimson-rados-experimental/thrash/workloads/.qa
new file mode 120000
index 00000000000..a602a0353e7
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/admin_socket_objecter_requests.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/admin_socket_objecter_requests.yaml
new file mode 100644
index 00000000000..8c9764ade84
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/admin_socket_objecter_requests.yaml
@@ -0,0 +1,13 @@
+overrides:
+  ceph:
+    conf:
+      client.0:
+        admin socket: /var/run/ceph/ceph-$name.asok
+tasks:
+- radosbench:
+    clients: [client.0]
+    time: 150
+- admin_socket:
+    client.0:
+      objecter_requests:
+        test: "http://git.ceph.com/?p={repo};a=blob_plain;f=src/test/admin_socket/objecter_requests;hb={branch}"
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/pool-snaps-few-objects.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/pool-snaps-few-objects.yaml
new file mode 100644
index 00000000000..d35e8421ab4
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/pool-snaps-few-objects.yaml
@@ -0,0 +1,20 @@
+overrides:
+  conf:
+    osd:
+      osd deep scrub update digest min age: 0
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    pool_snaps: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 0
+      # TODO: CEPH_OSD_OP_COPY_FROM
+      copy_from: 0
+
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/radosbench-high-concurrency.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/radosbench-high-concurrency.yaml
new file mode 100644
index 00000000000..902c4b56a1e
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/radosbench-high-concurrency.yaml
@@ -0,0 +1,49 @@
+overrides:
+  ceph:
+    conf:
+      client.0:
+        debug ms: 1
+        debug objecter: 20
+        debug rados: 20
+tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      concurrency: 128
+      size: 8192
+      time: 90
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/radosbench.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/radosbench.yaml
new file mode 100644
index 00000000000..071f55e3928
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/radosbench.yaml
@@ -0,0 +1,24 @@
+overrides:
+  ceph:
+    conf:
+      client.0:
+        debug ms: 1
+        debug objecter: 20
+        debug rados: 20
+tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/small-objects-balanced.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/small-objects-balanced.yaml
new file mode 100644
index 00000000000..afe04229898
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/small-objects-balanced.yaml
@@ -0,0 +1,24 @@
+overrides:
+  ceph:
+    crush_tunables: jewel
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 400000
+    max_seconds: 600
+    max_in_flight: 64
+    objects: 1024
+    size: 16384
+    balance_reads: true
+    max_attr_len: 8192
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 0
+      # TODO: CEPH_OSD_OP_COPY_FROM
+      copy_from: 0
+      setattr: 25
+      rmattr: 25
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/small-objects-localized.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/small-objects-localized.yaml
new file mode 100644
index 00000000000..445b582ea42
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/small-objects-localized.yaml
@@ -0,0 +1,24 @@
+overrides:
+  ceph:
+    crush_tunables: jewel
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 400000
+    max_seconds: 600
+    max_in_flight: 64
+    objects: 1024
+    size: 16384
+    localize_reads: true
+    max_attr_len: 8192
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 0
+      # TODO: CEPH_OSD_OP_COPY_FROM
+      copy_from: 0
+      setattr: 25
+      rmattr: 25
diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-balanced.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/small-objects.yaml
index af0ac39310e..e7e8070fd76 100644
--- a/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-balanced.yaml
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/small-objects.yaml
@@ -1,3 +1,6 @@
+overrides:
+  ceph:
+    crush_tunables: jewel
 tasks:
 - rados:
     clients: [client.0]
@@ -6,16 +9,15 @@ tasks:
     max_in_flight: 64
     objects: 1024
     size: 16384
-    ec_pool: true
-    balanced_reads: true
+    max_attr_len: 8192
     op_weights:
       read: 100
-      write: 0
-      append: 100
+      write: 100
       delete: 50
       snap_create: 50
       snap_remove: 50
-      rollback: 50
-      copy_from: 50
+      rollback: 0
+      # TODO: CEPH_OSD_OP_COPY_FROM
+      copy_from: 0
       setattr: 25
       rmattr: 25
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects-balanced.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects-balanced.yaml
new file mode 100644
index 00000000000..1161c3cc253
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects-balanced.yaml
@@ -0,0 +1,15 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    balance_reads: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 0
+      # TODO: CEPH_OSD_OP_COPY_FROM
+      copy_from: 0
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects-localized.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects-localized.yaml
new file mode 100644
index 00000000000..80af0def0e4
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects-localized.yaml
@@ -0,0 +1,15 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    localize_reads: true
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 0
+      # TODO: CEPH_OSD_OP_COPY_FROM
+      copy_from: 0
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects.yaml
new file mode 100644
index 00000000000..0694ffcd0d6
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects.yaml
@@ -0,0 +1,14 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 0
+      # TODO: CEPH_OSD_OP_COPY_FROM
+      copy_from: 0
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/write_fadvise_dontneed.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/write_fadvise_dontneed.yaml
new file mode 100644
index 00000000000..606dcae6922
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/write_fadvise_dontneed.yaml
@@ -0,0 +1,8 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 500
+    write_fadvise_dontneed: true
+    op_weights:
+      write: 100
diff --git a/qa/suites/crimson-rados/basic/objectstore/bluestore.yaml b/qa/suites/crimson-rados/basic/objectstore/bluestore.yaml
index e84f396e4b2..481e393be4a 120000
--- a/qa/suites/crimson-rados/basic/objectstore/bluestore.yaml
+++ b/qa/suites/crimson-rados/basic/objectstore/bluestore.yaml
@@ -1 +1 @@
-.qa/config/bluestore.yaml
-\ No newline at end of file
+.qa/config/crimson_bluestore.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/basic/objectstore/seastore.yaml b/qa/suites/crimson-rados/basic/objectstore/seastore.yaml
index 6a70c381709..61e26e7acf8 120000
--- a/qa/suites/crimson-rados/basic/objectstore/seastore.yaml
+++ b/qa/suites/crimson-rados/basic/objectstore/seastore.yaml
@@ -1 +1 @@
-.qa/config/seastore.yaml
-\ No newline at end of file
+.qa/config/crimson_seastore.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/basic/tasks/rados_python.yaml b/qa/suites/crimson-rados/basic/tasks/rados_python.yaml
index a6af2957119..1302e14f21a 100644
--- a/qa/suites/crimson-rados/basic/tasks/rados_python.yaml
+++ b/qa/suites/crimson-rados/basic/tasks/rados_python.yaml
@@ -17,4 +17,4 @@ tasks:
     timeout: 1h
     clients:
       client.0:
-        - rados/test_python.sh -m 'not (tier or ec)'
+        - rados/test_python.sh -m 'not (wait or tier or ec)'
diff --git a/qa/suites/crimson-rados/perf/objectstore/bluestore.yaml b/qa/suites/crimson-rados/perf/objectstore/bluestore.yaml
index e84f396e4b2..481e393be4a 120000
--- a/qa/suites/crimson-rados/perf/objectstore/bluestore.yaml
+++ b/qa/suites/crimson-rados/perf/objectstore/bluestore.yaml
@@ -1 +1 @@
-.qa/config/bluestore.yaml
-\ No newline at end of file
+.qa/config/crimson_bluestore.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/perf/objectstore/seastore.yaml b/qa/suites/crimson-rados/perf/objectstore/seastore.yaml
index 6a70c381709..61e26e7acf8 120000
--- a/qa/suites/crimson-rados/perf/objectstore/seastore.yaml
+++ b/qa/suites/crimson-rados/perf/objectstore/seastore.yaml
@@ -1 +1 @@
-.qa/config/seastore.yaml
-\ No newline at end of file
+.qa/config/crimson_seastore.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/rbd/objectstore/bluestore.yaml b/qa/suites/crimson-rados/rbd/objectstore/bluestore.yaml
index e84f396e4b2..481e393be4a 120000
--- a/qa/suites/crimson-rados/rbd/objectstore/bluestore.yaml
+++ b/qa/suites/crimson-rados/rbd/objectstore/bluestore.yaml
@@ -1 +1 @@
-.qa/config/bluestore.yaml
-\ No newline at end of file
+.qa/config/crimson_bluestore.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/rbd/objectstore/seastore.yaml b/qa/suites/crimson-rados/rbd/objectstore/seastore.yaml
index 6a70c381709..61e26e7acf8 120000
--- a/qa/suites/crimson-rados/rbd/objectstore/seastore.yaml
+++ b/qa/suites/crimson-rados/rbd/objectstore/seastore.yaml
@@ -1 +1 @@
-.qa/config/seastore.yaml
-\ No newline at end of file
+.qa/config/crimson_seastore.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/singleton/objectstore b/qa/suites/crimson-rados/singleton/objectstore
deleted file mode 120000
index dbccf5ad928..00000000000
--- a/qa/suites/crimson-rados/singleton/objectstore
+++ /dev/null
@@ -1 +0,0 @@
-../thrash/objectstore
-\ No newline at end of file
diff --git a/qa/suites/crimson-rados/singleton/objectstore/.qa b/qa/suites/crimson-rados/singleton/objectstore/.qa
new file mode 120000
index 00000000000..a602a0353e7
--- /dev/null
+++ b/qa/suites/crimson-rados/singleton/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/singleton/objectstore/bluestore.yaml b/qa/suites/crimson-rados/singleton/objectstore/bluestore.yaml
new file mode 120000
index 00000000000..481e393be4a
--- /dev/null
+++ b/qa/suites/crimson-rados/singleton/objectstore/bluestore.yaml
@@ -0,0 +1 @@
+.qa/config/crimson_bluestore.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/singleton/objectstore/seastore.yaml b/qa/suites/crimson-rados/singleton/objectstore/seastore.yaml
new file mode 120000
index 00000000000..61e26e7acf8
--- /dev/null
+++ b/qa/suites/crimson-rados/singleton/objectstore/seastore.yaml
@@ -0,0 +1 @@
+.qa/config/crimson_seastore.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash_simple/1-pg-log-overrides/short_pg_log.yaml.disabled b/qa/suites/crimson-rados/thrash/1-pg-log-overrides/short_pg_log.yaml
index abd86d7d986..abd86d7d986 120000
--- a/qa/suites/crimson-rados/thrash_simple/1-pg-log-overrides/short_pg_log.yaml.disabled
+++ b/qa/suites/crimson-rados/thrash/1-pg-log-overrides/short_pg_log.yaml
diff --git a/qa/suites/crimson-rados/thrash/objectstore/bluestore.yaml b/qa/suites/crimson-rados/thrash/objectstore/bluestore.yaml
index e84f396e4b2..481e393be4a 120000
--- a/qa/suites/crimson-rados/thrash/objectstore/bluestore.yaml
+++ b/qa/suites/crimson-rados/thrash/objectstore/bluestore.yaml
@@ -1 +1 @@
-.qa/config/bluestore.yaml
-\ No newline at end of file
+.qa/config/crimson_bluestore.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/objectstore/seastore.yaml.disabled b/qa/suites/crimson-rados/thrash/objectstore/seastore.yaml.disabled
new file mode 120000
index 00000000000..61e26e7acf8
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/objectstore/seastore.yaml.disabled
@@ -0,0 +1 @@
+.qa/config/crimson_seastore.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash_simple/1-pg-log-overrides/short_pg_log.yaml b/qa/suites/crimson-rados/thrash_simple/1-pg-log-overrides/short_pg_log.yaml
new file mode 120000
index 00000000000..abd86d7d986
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash_simple/1-pg-log-overrides/short_pg_log.yaml
@@ -0,0 +1 @@
+.qa/overrides/short_pg_log.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash_simple/objectstore/bluestore.yaml b/qa/suites/crimson-rados/thrash_simple/objectstore/bluestore.yaml
index e84f396e4b2..481e393be4a 120000
--- a/qa/suites/crimson-rados/thrash_simple/objectstore/bluestore.yaml
+++ b/qa/suites/crimson-rados/thrash_simple/objectstore/bluestore.yaml
@@ -1 +1 @@
-.qa/config/bluestore.yaml
-\ No newline at end of file
+.qa/config/crimson_bluestore.yaml
+\ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash_simple/objectstore/seastore.yaml b/qa/suites/crimson-rados/thrash_simple/objectstore/seastore.yaml
index 6a70c381709..61e26e7acf8 120000
--- a/qa/suites/crimson-rados/thrash_simple/objectstore/seastore.yaml
+++ b/qa/suites/crimson-rados/thrash_simple/objectstore/seastore.yaml
@@ -1 +1 @@
-.qa/config/seastore.yaml
-\ No newline at end of file
+.qa/config/crimson_seastore.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/multifs/tasks/failover.yaml b/qa/suites/fs/multifs/tasks/failover.yaml
index 55dde639c23..b7a0338566c 100644
--- a/qa/suites/fs/multifs/tasks/failover.yaml
+++ b/qa/suites/fs/multifs/tasks/failover.yaml
@@ -8,6 +8,7 @@ overrides:
       - \(MDS_DAMAGE\)
       - \(FS_DEGRADED\)
       - \(MDS_CACHE_OVERSIZED\)
+      - \(MDS_ESTIMATED_REPLAY_TIME\)
   ceph-fuse:
     disabled: true
 tasks:
diff --git a/qa/suites/fs/nfs/tasks/nfs.yaml b/qa/suites/fs/nfs/tasks/nfs.yaml
index aa966bff214..2dd668c9f88 100644
--- a/qa/suites/fs/nfs/tasks/nfs.yaml
+++ b/qa/suites/fs/nfs/tasks/nfs.yaml
@@ -1,3 +1,10 @@
+overrides:
+  install:
+    extra_system_packages:
+      rpm:
+        - fio
+      deb:
+        - fio
 tasks:
   - cephfs_test_runner:
       modules:
diff --git a/qa/suites/fs/thrash/workloads/overrides/% b/qa/suites/fs/thrash/workloads/overrides/%
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/overrides/%
diff --git a/qa/suites/fs/thrash/workloads/overrides/prefetch_oft_dirfrags/.qa b/qa/suites/fs/thrash/workloads/overrides/prefetch_oft_dirfrags/.qa
new file mode 120000
index 00000000000..a602a0353e7
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/overrides/prefetch_oft_dirfrags/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/no.yaml b/qa/suites/fs/thrash/workloads/overrides/prefetch_oft_dirfrags/no.yaml
index 91b45367934..91b45367934 100644
--- a/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/no.yaml
+++ b/qa/suites/fs/thrash/workloads/overrides/prefetch_oft_dirfrags/no.yaml
diff --git a/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/yes.yaml b/qa/suites/fs/thrash/workloads/overrides/prefetch_oft_dirfrags/yes.yaml
index bd202f988c8..bd202f988c8 100644
--- a/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/yes.yaml
+++ b/qa/suites/fs/thrash/workloads/overrides/prefetch_oft_dirfrags/yes.yaml
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/kernel.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/kernel.yaml
new file mode 120000
index 00000000000..a7f7b735665
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/kernel.yaml
@@ -0,0 +1 @@
+.qa/cephfs/begin/3-kernel.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/kernel.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/kernel.yaml
new file mode 120000
index 00000000000..a7f7b735665
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/kernel.yaml
@@ -0,0 +1 @@
+.qa/cephfs/begin/3-kernel.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/kernel.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/kernel.yaml
new file mode 120000
index 00000000000..a7f7b735665
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/kernel.yaml
@@ -0,0 +1 @@
+.qa/cephfs/begin/3-kernel.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/nofs/kernel.yaml b/qa/suites/fs/upgrade/nofs/kernel.yaml
new file mode 120000
index 00000000000..a7f7b735665
--- /dev/null
+++ b/qa/suites/fs/upgrade/nofs/kernel.yaml
@@ -0,0 +1 @@
+.qa/cephfs/begin/3-kernel.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/kernel.yaml b/qa/suites/fs/upgrade/upgraded_client/kernel.yaml
new file mode 120000
index 00000000000..a7f7b735665
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/kernel.yaml
@@ -0,0 +1 @@
+.qa/cephfs/begin/3-kernel.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/begin/3-kernel.yaml b/qa/suites/fs/workload/begin/3-kernel.yaml
new file mode 120000
index 00000000000..a7f7b735665
--- /dev/null
+++ b/qa/suites/fs/workload/begin/3-kernel.yaml
@@ -0,0 +1 @@
+.qa/cephfs/begin/3-kernel.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/tasks/6-workunit/kernel_untar_build.yaml b/qa/suites/fs/workload/tasks/6-workunit/kernel_untar_build.yaml
index 602d3416263..aa327b0cdf5 100644
--- a/qa/suites/fs/workload/tasks/6-workunit/kernel_untar_build.yaml
+++ b/qa/suites/fs/workload/tasks/6-workunit/kernel_untar_build.yaml
@@ -5,6 +5,7 @@ overrides:
         - "mds.dir_split"
 tasks:
 - workunit:
+    timeout: 5h
     clients:
       all:
         - kernel_untar_build.sh
diff --git a/qa/suites/nvmeof/basic/clusters/4-gateways-2-initiator.yaml b/qa/suites/nvmeof/basic/clusters/4-gateways-2-initiator.yaml
index e8f390c3b78..7f20f9f04a8 100644
--- a/qa/suites/nvmeof/basic/clusters/4-gateways-2-initiator.yaml
+++ b/qa/suites/nvmeof/basic/clusters/4-gateways-2-initiator.yaml
@@ -28,3 +28,5 @@ overrides:
       mon:
         # cephadm can take up to 5 minutes to bring up remaining mons
         mon down mkfs grace: 300
+    log-ignorelist:
+      - NVMEOF_SINGLE_GATEWAY
diff --git a/qa/suites/nvmeof/basic/workloads/nvmeof_initiator.yaml b/qa/suites/nvmeof/basic/workloads/nvmeof_initiator.yaml
index 7c97edae552..0416ae2ea4e 100644
--- a/qa/suites/nvmeof/basic/workloads/nvmeof_initiator.yaml
+++ b/qa/suites/nvmeof/basic/workloads/nvmeof_initiator.yaml
@@ -1,7 +1,8 @@
+# runs on default nvmeof image (i.e. DEFAULT_NVMEOF_IMAGE)
 tasks:
 - nvmeof:
     installer: host.a
-    gw_image: quay.io/ceph/nvmeof:latest # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest"
+    gw_image: default # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest"
     rbd:
       pool_name: mypool
       image_name_prefix: myimage
diff --git a/qa/suites/nvmeof/basic/workloads/nvmeof_mtls.yaml b/qa/suites/nvmeof/basic/workloads/nvmeof_mtls.yaml
new file mode 100644
index 00000000000..8eb4f6dc63c
--- /dev/null
+++ b/qa/suites/nvmeof/basic/workloads/nvmeof_mtls.yaml
@@ -0,0 +1,36 @@
+tasks:
+- nvmeof:
+    installer: host.a
+    gw_image: quay.io/ceph/nvmeof:latest # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest"
+    rbd:
+      pool_name: mypool
+      image_name_prefix: myimage
+    gateway_config:
+      subsystems_count: 3
+      namespaces_count: 20
+      cli_image: quay.io/ceph/nvmeof-cli:latest
+      create_mtls_secrets: true
+
+- cephadm.wait_for_service:
+    service: nvmeof.mypool.mygroup0
+
+- workunit:
+    no_coverage_and_limits: true
+    timeout: 30m
+    clients:
+      client.0:
+        - nvmeof/setup_subsystem.sh
+        - nvmeof/basic_tests.sh
+        - nvmeof/fio_test.sh --rbd_iostat
+    env:
+      RBD_POOL: mypool
+      RBD_IMAGE_PREFIX: myimage
+      IOSTAT_INTERVAL: '10'
+      RUNTIME: '60'
+
+- workunit:
+    no_coverage_and_limits: true
+    timeout: 30m
+    clients:
+      client.0:
+        - nvmeof/mtls_test.sh
diff --git a/qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml b/qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml
index 9ef37004427..dfe31380bb6 100644
--- a/qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml
+++ b/qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml
@@ -18,6 +18,7 @@ tasks:
     clients:
       client.0:
         - nvmeof/setup_subsystem.sh
+        - nvmeof/basic_tests.sh
     env:
       RBD_POOL: mypool
       RBD_IMAGE_PREFIX: myimage
@@ -27,7 +28,6 @@ tasks:
     timeout: 30m
     clients:
       client.0:
-        - nvmeof/basic_tests.sh
         - nvmeof/fio_test.sh --rbd_iostat
       client.1:
         - nvmeof/basic_tests.sh
diff --git a/qa/suites/nvmeof/basic/workloads/nvmeof_scalability.yaml b/qa/suites/nvmeof/basic/workloads/nvmeof_scalability.yaml
index 12cb50b408d..d66b6fc8093 100644
--- a/qa/suites/nvmeof/basic/workloads/nvmeof_scalability.yaml
+++ b/qa/suites/nvmeof/basic/workloads/nvmeof_scalability.yaml
@@ -31,8 +31,11 @@ tasks:
     no_coverage_and_limits: true
     timeout: 30m
     clients:
-      client.0:
+      client.3:
         - nvmeof/scalability_test.sh nvmeof.a,nvmeof.b
         - nvmeof/scalability_test.sh nvmeof.b,nvmeof.c,nvmeof.d
+        - nvmeof/scalability_test.sh nvmeof.b,nvmeof.c
     env:
       SCALING_DELAYS: '50'
+      RBD_POOL: mypool
+      NVMEOF_GROUP: mygroup0
diff --git a/qa/suites/nvmeof/thrash/gateway-initiator-setup/10-subsys-90-namespace-no_huge_pages.yaml b/qa/suites/nvmeof/thrash/gateway-initiator-setup/10-subsys-90-namespace-no_huge_pages.yaml
new file mode 100644
index 00000000000..83d54cdf5c3
--- /dev/null
+++ b/qa/suites/nvmeof/thrash/gateway-initiator-setup/10-subsys-90-namespace-no_huge_pages.yaml
@@ -0,0 +1,37 @@
+tasks:
+- nvmeof:
+    installer: host.a
+    gw_image: quay.io/ceph/nvmeof:latest # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest"
+    rbd:
+      pool_name: mypool
+      image_name_prefix: myimage
+    gateway_config:
+      subsystems_count: 10 
+      namespaces_count: 90 # each subsystem
+      cli_image: quay.io/ceph/nvmeof-cli:latest
+
+- cephadm.wait_for_service:
+    service: nvmeof.mypool.mygroup0
+
+- cephadm.exec:
+    host.a:
+      - ceph orch ls nvmeof --export > /tmp/nvmeof-orig.yaml
+      - cp /tmp/nvmeof-orig.yaml /tmp/nvmeof-no-huge-page.yaml 
+      - "sed -i '/  pool: mypool/a\\  spdk_mem_size: 4096' /tmp/nvmeof-no-huge-page.yaml"
+      - cat /tmp/nvmeof-no-huge-page.yaml
+      - ceph orch ls --refresh
+      - ceph orch apply -i /tmp/nvmeof-no-huge-page.yaml
+      - ceph orch redeploy nvmeof.mypool.mygroup0
+      
+- cephadm.wait_for_service:
+    service: nvmeof.mypool.mygroup0
+
+- workunit:
+    no_coverage_and_limits: true
+    clients:
+      client.0:
+        - nvmeof/setup_subsystem.sh
+        - nvmeof/basic_tests.sh
+    env:
+      RBD_POOL: mypool
+      RBD_IMAGE_PREFIX: myimage
diff --git a/qa/suites/nvmeof/thrash/gateway-initiator-setup/3-subsys-60-namespace.yaml b/qa/suites/nvmeof/thrash/gateway-initiator-setup/120-subsys-8-namespace.yaml
index b4755a6433b..0f7ac011a60 100644
--- a/qa/suites/nvmeof/thrash/gateway-initiator-setup/3-subsys-60-namespace.yaml
+++ b/qa/suites/nvmeof/thrash/gateway-initiator-setup/120-subsys-8-namespace.yaml
@@ -6,8 +6,8 @@ tasks:
       pool_name: mypool
       image_name_prefix: myimage
     gateway_config:
-      subsystems_count: 3
-      namespaces_count: 20 # each subsystem
+      subsystems_count: 120
+      namespaces_count: 8 # each subsystem
       cli_image: quay.io/ceph/nvmeof-cli:latest
 
 - cephadm.wait_for_service:
diff --git a/qa/suites/nvmeof/thrash/thrashers/nvmeof_mon_thrash.yaml b/qa/suites/nvmeof/thrash/thrashers/nvmeof_mon_thrash.yaml
index 6a5bd1d754e..46037784d31 100644
--- a/qa/suites/nvmeof/thrash/thrashers/nvmeof_mon_thrash.yaml
+++ b/qa/suites/nvmeof/thrash/thrashers/nvmeof_mon_thrash.yaml
@@ -8,6 +8,10 @@ overrides:
       - out of quorum
       # nvmeof daemon thrashing
       - CEPHADM_FAILED_DAEMON
+      - NVMEOF_SINGLE_GATEWAY
+      - NVMEOF_GATEWAY_DOWN
+      - are in unavailable state
+      - is unavailable
       - is in error state
       - failed cephadm daemon
 
diff --git a/qa/suites/nvmeof/thrash/thrashers/nvmeof_thrash.yaml b/qa/suites/nvmeof/thrash/thrashers/nvmeof_thrash.yaml
index 422c821536a..b58dc14d87b 100644
--- a/qa/suites/nvmeof/thrash/thrashers/nvmeof_thrash.yaml
+++ b/qa/suites/nvmeof/thrash/thrashers/nvmeof_thrash.yaml
@@ -3,9 +3,14 @@ overrides:
     log-ignorelist:  
       # nvmeof daemon thrashing
       - CEPHADM_FAILED_DAEMON
+      - NVMEOF_SINGLE_GATEWAY
+      - NVMEOF_GATEWAY_DOWN
+      - are in unavailable state
+      - is unavailable
       - is in error state
       - failed cephadm daemon
 
 tasks:
 - nvmeof.thrash:
     checker_host: 'client.0'
+    randomize: False
diff --git a/qa/suites/nvmeof/thrash/workloads/fio.yaml b/qa/suites/nvmeof/thrash/workloads/fio.yaml
index b042b92d6ae..f9a0d0ebde5 100644
--- a/qa/suites/nvmeof/thrash/workloads/fio.yaml
+++ b/qa/suites/nvmeof/thrash/workloads/fio.yaml
@@ -1,11 +1,11 @@
 tasks:
 - workunit:
     no_coverage_and_limits: true
-    timeout: 30m
+    timeout: 60m
     clients:
       client.0:
-        - nvmeof/fio_test.sh --rbd_iostat
+        - nvmeof/fio_test.sh --random_devices 200
     env:
       RBD_POOL: mypool
       IOSTAT_INTERVAL: '10'
-      RUNTIME: '600'
+      RUNTIME: '1800'
diff --git a/qa/suites/orch/cephadm/smb/tasks/deploy_smb_mgr_clustering_ips.yaml b/qa/suites/orch/cephadm/smb/tasks/deploy_smb_mgr_clustering_ips.yaml
new file mode 100644
index 00000000000..3bbf30ea427
--- /dev/null
+++ b/qa/suites/orch/cephadm/smb/tasks/deploy_smb_mgr_clustering_ips.yaml
@@ -0,0 +1,91 @@
+roles:
+# Test is for basic smb deployment & functionality. one node cluster is OK
+- - host.a
+  - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - client.0
+- - host.b
+  - mon.b
+  - osd.2
+  - osd.3
+- - host.c
+  - mon.c
+  - osd.4
+  - osd.5
+# Reserve a host for acting as a domain controller and smb client
+- - host.d
+  - cephadm.exclude
+overrides:
+  ceph:
+    log-only-match:
+      - CEPHADM_
+tasks:
+- cephadm.configure_samba_client_container:
+    role: host.d
+- vip:
+    count: 1
+- cephadm:
+
+- cephadm.shell:
+    host.a:
+      - ceph fs volume create cephfs
+- cephadm.wait_for_service:
+    service: mds.cephfs
+
+- cephadm.shell:
+    host.a:
+      # add subvolgroup & subvolumes for test
+      - cmd: ceph fs subvolumegroup create cephfs smb
+      - cmd: ceph fs subvolume create cephfs sv1 --group-name=smb --mode=0777
+      - cmd: ceph fs subvolume create cephfs sv2 --group-name=smb --mode=0777
+      # set up smb cluster and shares
+      - cmd: ceph mgr module enable smb
+      - cmd: sleep 30
+      - cmd: >
+          ceph smb cluster create modusr1 user
+          --define-user-pass=user1%t3stP4ss1
+          --placement=count:3
+          --clustering=default
+          --public_addrs={{VIP0}}/{{VIPPREFIXLEN}}
+      - cmd: ceph smb share create modusr1 share1 cephfs / --subvolume=smb/sv1
+      - cmd: ceph smb share create modusr1 share2 cephfs / --subvolume=smb/sv2
+# Wait for the smb service to start
+- cephadm.wait_for_service:
+    service: smb.modusr1
+
+# Check if shares exist
+- cephadm.exec:
+    host.d:
+      - sleep 30
+      - "{{ctx.samba_client_container_cmd|join(' ')}} smbclient -U user1%t3stP4ss1 //{{'host.a'|role_to_remote|attr('ip_address')}}/share1 -c ls"
+      - "{{ctx.samba_client_container_cmd|join(' ')}} smbclient -U user1%t3stP4ss1 //{{'host.a'|role_to_remote|attr('ip_address')}}/share2 -c ls"
+
+# verify CTDB is healthy, cluster well formed
+- cephadm.exec:
+    host.a:
+      - "{{ctx.cephadm}} ls --no-detail  | {{ctx.cephadm}} shell jq -r 'map(select(.name | startswith(\"smb.modusr1\")))[-1].name' > /tmp/svcname"
+      - "{{ctx.cephadm}} enter -n $(cat /tmp/svcname) ctdb status > /tmp/ctdb_status"
+      - cat /tmp/ctdb_status
+      - grep 'pnn:0 .*OK' /tmp/ctdb_status
+      - grep 'pnn:1 .*OK' /tmp/ctdb_status
+      - grep 'pnn:2 .*OK' /tmp/ctdb_status
+      - grep 'Number of nodes:3' /tmp/ctdb_status
+      - rm -rf /tmp/svcname /tmp/ctdb_status
+
+# Test the assigned VIP
+- cephadm.exec:
+    host.d:
+      - sleep 30
+      - "{{ctx.samba_client_container_cmd|join(' ')}} smbclient -U user1%t3stP4ss1 //{{VIP0}}/share1 -c ls"
+      - "{{ctx.samba_client_container_cmd|join(' ')}} smbclient -U user1%t3stP4ss1 //{{VIP0}}/share2 -c ls"
+
+- cephadm.shell:
+    host.a:
+      - cmd: ceph smb share rm modusr1 share2
+      - cmd: ceph smb share rm modusr1 share1
+      - cmd: ceph smb cluster rm modusr1
+# Wait for the smb service to be removed
+- cephadm.wait_for_service_not_present:
+    service: smb.modusr1
diff --git a/qa/suites/rados/mgr/tasks/4-units/module_selftest.yaml b/qa/suites/rados/mgr/tasks/4-units/module_selftest.yaml
index 1eb4a184dca..e2a2ca03cc9 100644
--- a/qa/suites/rados/mgr/tasks/4-units/module_selftest.yaml
+++ b/qa/suites/rados/mgr/tasks/4-units/module_selftest.yaml
@@ -6,7 +6,6 @@ overrides:
       - objects misplaced
       - Synthetic exception in serve
       - influxdb python module not found
-      - \(MGR_ZABBIX_
       - foo bar
       - Failed to open Telegraf
       - evicting unresponsive client
diff --git a/qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml b/qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml
index 372bf2561fa..8b3c4c11ac6 100644
--- a/qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml
+++ b/qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml
@@ -15,6 +15,7 @@ overrides:
       # causing tests to fail due to health warns, even if
       # the tests themselves are successful.
       - \(OSDMAP_FLAGS\)
+      - \(PG_DEGRADED\)
 tasks:
 - workunit:
     clients:
diff --git a/qa/suites/rados/objectstore/backends/ceph_test_bluefs.yaml b/qa/suites/rados/objectstore/backends/ceph_test_bluefs.yaml
new file mode 100644
index 00000000000..7cd47898544
--- /dev/null
+++ b/qa/suites/rados/objectstore/backends/ceph_test_bluefs.yaml
@@ -0,0 +1,8 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- exec:
+    client.0:
+      - mkdir $TESTDIR/ceph_test_bluefs && cd $TESTDIR/ceph_test_bluefs && ceph_test_bluefs --log-file $TESTDIR/archive/ceph_test_bluefs.log --debug-bluefs 5/20 --gtest_catch_exceptions=0
+      - rm -rf $TESTDIR/ceph_test_bluefs
diff --git a/qa/suites/rados/rest/mgr-restful.yaml b/qa/suites/rados/rest/mgr-restful.yaml
deleted file mode 100644
index 4901f401d30..00000000000
--- a/qa/suites/rados/rest/mgr-restful.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-openstack:
-- volumes: # attached to each instance
-    count: 3
-    size: 10 # GB
-roles:
-- [mon.a, mgr.x, osd.0, osd.1, osd.2, mds.a, client.a]
-tasks:
-- install:
-- ceph:
-    log-ignorelist:
-      - overall HEALTH_
-      - \(MGR_DOWN\)
-      - \(PG_
-      - \(OSD_
-      - \(OBJECT_
-      - \(OSDMAP_FLAGS\)
-      - \(POOL_APP_NOT_ENABLED\)
-- exec:
-    mon.a:
-      - ceph restful create-key admin
-      - ceph restful create-self-signed-cert
-      - ceph restful restart
-- workunit:
-    clients:
-      client.a:
-        - rest/test-restful.sh
-- exec:
-    mon.a:
-      - ceph restful delete-key admin
-      - ceph restful list-keys | jq ".admin" | grep null
-
diff --git a/qa/suites/rados/rest/supported-random-distro$ b/qa/suites/rados/rest/supported-random-distro$
deleted file mode 120000
index 7cef21eeffd..00000000000
--- a/qa/suites/rados/rest/supported-random-distro$
+++ /dev/null
@@ -1 +0,0 @@
-../basic/supported-random-distro$
-\ No newline at end of file
diff --git a/qa/suites/rados/singleton/all/stretch-mode-5-mons-8-osds.yaml b/qa/suites/rados/singleton/all/stretch-mode-5-mons-8-osds.yaml
new file mode 100644
index 00000000000..69a54b0f1b7
--- /dev/null
+++ b/qa/suites/rados/singleton/all/stretch-mode-5-mons-8-osds.yaml
@@ -0,0 +1,57 @@
+roles:
+- - mon.a
+  - mon.b
+  - mgr.a
+  - mgr.b
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+- - mon.c
+  - mon.d
+  - mgr.c
+  - mgr.d
+  - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+- - mon.e
+- - client.0
+
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
+overrides:
+  ceph:
+    conf:
+      global:
+        osd pool default size: 3
+        osd pool default min size: 2
+      mon:
+        debug mon: 30
+tasks:
+- install:
+- ceph:
+    pre-mgr-commands:
+      - sudo ceph config set mgr mgr_pool false --force
+    log-ignorelist:
+      - \(POOL_
+      - \(CACHE_POOL_
+      - overall HEALTH_
+      - \(PG_AVAILABILITY\)
+      - Reduced data availability
+      - \(PG_DEGRADED\)
+      - \(MON_DOWN\)
+      - \(OSD_DATACENTER_DOWN\)
+      - \(OSD_DOWN\)
+      - \(OSD_HOST_DOWN\)
+
+
+- workunit:
+    clients:
+      client.0:
+        - mon/mon-stretch-mode-5-mons-8-osds.sh
+- cephfs_test_runner:
+    modules:
+      - tasks.stretch_mode_disable_enable
diff --git a/qa/suites/rados/thrash-old-clients/workloads/rbd_cls.yaml b/qa/suites/rados/thrash-old-clients/workloads/rbd_cls.yaml
index a8bbbafece0..b916bed1475 100644
--- a/qa/suites/rados/thrash-old-clients/workloads/rbd_cls.yaml
+++ b/qa/suites/rados/thrash-old-clients/workloads/rbd_cls.yaml
@@ -2,6 +2,9 @@ meta:
 - desc: |
    rbd object class functional tests
 tasks:
-- exec:
-    client.2:
-      - ceph_test_cls_rbd --gtest_filter=-TestClsRbd.get_features:TestClsRbd.parents:TestClsRbd.mirror
+- workunit:
+    clients:
+      client.2:
+        - cls/test_cls_rbd.sh
+    env:
+      CLS_RBD_GTEST_FILTER: '-TestClsRbd.group_snap_set:TestClsRbd.group_snap_remove'
diff --git a/qa/suites/rados/valgrind-leaks/1-start.yaml b/qa/suites/rados/valgrind-leaks/1-start.yaml
index 1cdd8a688e8..cc8c8e53766 100644
--- a/qa/suites/rados/valgrind-leaks/1-start.yaml
+++ b/qa/suites/rados/valgrind-leaks/1-start.yaml
@@ -12,6 +12,7 @@ overrides:
       - overall HEALTH_
       - \(PG_
       - \(POOL_APP_NOT_ENABLED\)
+      - OSD bench result
     conf:
       global:
         osd heartbeat grace: 40
diff --git a/qa/suites/rados/verify/validater/valgrind.yaml b/qa/suites/rados/verify/validater/valgrind.yaml
index c70893893fd..17cf141b0cd 100644
--- a/qa/suites/rados/verify/validater/valgrind.yaml
+++ b/qa/suites/rados/verify/validater/valgrind.yaml
@@ -26,6 +26,8 @@ overrides:
       - \(MON_DOWN\)
       - \(SLOW_OPS\)
       - slow request
+      - OSD bench result
+      - OSD_DOWN
     valgrind:
       mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes]
       osd: [--tool=memcheck]
diff --git a/qa/suites/rbd/migration/6-prepare/qcow2-https.yaml b/qa/suites/rbd/migration/6-prepare/qcow2-https.yaml
new file mode 100644
index 00000000000..d2072c41a68
--- /dev/null
+++ b/qa/suites/rbd/migration/6-prepare/qcow2-https.yaml
@@ -0,0 +1,8 @@
+tasks:
+  - exec:
+      client.0:
+        - mkdir /home/ubuntu/cephtest/migration
+        - qemu-img create -f qcow2 /home/ubuntu/cephtest/migration/empty.qcow2 1G
+        - echo '{"type":"qcow","stream":{"type":"http","url":"https://download.ceph.com/qa/ubuntu-12.04.qcow2"}}' | rbd migration prepare --import-only --source-spec-path - client.0.0
+        - rbd migration prepare --import-only --source-spec '{"type":"qcow","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/empty.qcow2"}}' client.0.1
+        - rbd migration prepare --import-only --source-spec '{"type":"qcow","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/empty.qcow2"}}' client.0.2
diff --git a/qa/suites/rgw/bucket-logging/% b/qa/suites/rgw/bucket-logging/%
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/%
diff --git a/qa/suites/rgw/bucket-logging/.qa b/qa/suites/rgw/bucket-logging/.qa
new file mode 120000
index 00000000000..a602a0353e7
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rgw/bucket-logging/0-install.yaml b/qa/suites/rgw/bucket-logging/0-install.yaml
new file mode 100644
index 00000000000..6cf82f57476
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/0-install.yaml
@@ -0,0 +1,13 @@
+tasks:
+- install:
+- ceph:
+- openssl_keys:
+- rgw: [client.0]
+- tox: [client.0]
+
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_min_pg_log_entries: 10
+        osd_max_pg_log_entries: 10
diff --git a/qa/suites/rgw/bucket-logging/beast.yaml b/qa/suites/rgw/bucket-logging/beast.yaml
new file mode 120000
index 00000000000..09ced62c42a
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/beast.yaml
@@ -0,0 +1 @@
+.qa/rgw_frontend/beast.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/bucket-logging/fixed-1.yaml b/qa/suites/rgw/bucket-logging/fixed-1.yaml
new file mode 120000
index 00000000000..02df5dd0cd0
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/fixed-1.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-1.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/bucket-logging/ignore-pg-availability.yaml b/qa/suites/rgw/bucket-logging/ignore-pg-availability.yaml
new file mode 120000
index 00000000000..32340b1fa8b
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/bucket-logging/overrides.yaml b/qa/suites/rgw/bucket-logging/overrides.yaml
new file mode 100644
index 00000000000..a448a323d36
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/overrides.yaml
@@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        setuser: ceph
+        setgroup: ceph
+        debug rgw: 20
+        rgw bucket logging obj roll time: 5
+  rgw:
+    storage classes: LUKEWARM, FROZEN
diff --git a/qa/suites/rgw/bucket-logging/s3tests-branch.yaml b/qa/suites/rgw/bucket-logging/s3tests-branch.yaml
new file mode 120000
index 00000000000..bdcaca48ae0
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/s3tests-branch.yaml
@@ -0,0 +1 @@
+.qa/rgw/s3tests-branch.yaml
+\ No newline at end of file
diff --git a/qa/suites/rgw/bucket-logging/supported-distros b/qa/suites/rgw/bucket-logging/supported-distros
new file mode 120000
index 00000000000..78f2991b407
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/supported-distros
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$/
+\ No newline at end of file
diff --git a/qa/suites/rgw/bucket-logging/tasks/+ b/qa/suites/rgw/bucket-logging/tasks/+
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/tasks/+
diff --git a/qa/suites/rgw/bucket-logging/tasks/s3tests.yaml b/qa/suites/rgw/bucket-logging/tasks/s3tests.yaml
new file mode 100644
index 00000000000..c1d3b7192e1
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/tasks/s3tests.yaml
@@ -0,0 +1,6 @@
+tasks:
+- s3tests:
+    client.0:
+      boto3_extensions: True
+      rgw_server: client.0
+      extra_attrs: ["bucket_logging"]
diff --git a/qa/suites/rgw/crypt/2-kms/barbican.yaml b/qa/suites/rgw/crypt/2-kms/barbican.yaml
index 9bf5fb81131..e3f78810416 100644
--- a/qa/suites/rgw/crypt/2-kms/barbican.yaml
+++ b/qa/suites/rgw/crypt/2-kms/barbican.yaml
@@ -27,7 +27,7 @@ tasks:
 - tox: [ client.0 ]
 - keystone:
     client.0:
-      force-branch: stable/2023.1
+      force-branch: stable/2024.1
       services:
         - name: swift
           type: object-store
@@ -68,7 +68,7 @@ tasks:
           project: s3
 - barbican:
     client.0:
-      force-branch: stable/2023.1
+      force-branch: stable/2024.1
       use-keystone-role: client.0
       keystone_authtoken:
         auth_plugin: password
diff --git a/qa/suites/rgw/multisite/realms/two-zonegroup.yaml.disabled b/qa/suites/rgw/multisite/realms/two-zonegroup.yaml
index 5e4234236a9..ac2104cdd05 100644
--- a/qa/suites/rgw/multisite/realms/two-zonegroup.yaml.disabled
+++ b/qa/suites/rgw/multisite/realms/two-zonegroup.yaml
@@ -28,4 +28,4 @@ overrides:
           - name: b2
             endpoints: [c2.client.1]
   rgw-multisite-tests:
-    args: [tests.py]
+    args: [tests.py, -a, '!fails_with_rgw']
diff --git a/qa/suites/rgw/notifications/tasks/kafka/test_kafka.yaml b/qa/suites/rgw/notifications/tasks/kafka/test_kafka.yaml
index 462570e7727..303f98d540e 100644
--- a/qa/suites/rgw/notifications/tasks/kafka/test_kafka.yaml
+++ b/qa/suites/rgw/notifications/tasks/kafka/test_kafka.yaml
@@ -1,7 +1,7 @@
 tasks:
 - kafka:
     client.0:
-      kafka_version: 2.6.0
+      kafka_version: 3.8.1
 - notification-tests:
     client.0:
       extra_attr: ["kafka_test", "data_path_v2_kafka_test"]
diff --git a/qa/suites/rgw/notifications/tasks/kafka_failover/+ b/qa/suites/rgw/notifications/tasks/kafka_failover/+
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/qa/suites/rgw/notifications/tasks/kafka_failover/+
diff --git a/qa/suites/rgw/notifications/tasks/kafka_failover/0-install.yaml b/qa/suites/rgw/notifications/tasks/kafka_failover/0-install.yaml
new file mode 100644
index 00000000000..5c83d5c0d23
--- /dev/null
+++ b/qa/suites/rgw/notifications/tasks/kafka_failover/0-install.yaml
@@ -0,0 +1,20 @@
+tasks:
+- install:
+- ceph:
+- openssl_keys:
+- rgw:
+    client.0:
+
+overrides:
+  install:
+    ceph:
+      extra_system_packages:
+        rpm:
+        - java
+        deb:
+        - default-jre
+  ceph:
+    conf:
+      global:
+        osd_min_pg_log_entries: 10
+        osd_max_pg_log_entries: 10
diff --git a/qa/suites/rgw/notifications/tasks/kafka_failover/supported-distros b/qa/suites/rgw/notifications/tasks/kafka_failover/supported-distros
new file mode 120000
index 00000000000..46280a42a96
--- /dev/null
+++ b/qa/suites/rgw/notifications/tasks/kafka_failover/supported-distros
@@ -0,0 +1 @@
+../../.qa/distros/supported-random-distro$/
+\ No newline at end of file
diff --git a/qa/suites/rgw/notifications/tasks/kafka_failover/test_kafka.yaml b/qa/suites/rgw/notifications/tasks/kafka_failover/test_kafka.yaml
new file mode 100644
index 00000000000..01d6fc637de
--- /dev/null
+++ b/qa/suites/rgw/notifications/tasks/kafka_failover/test_kafka.yaml
@@ -0,0 +1,8 @@
+tasks:
+- kafka-failover:
+    client.0:
+      kafka_version: 3.8.1
+- notification-tests:
+    client.0:
+      extra_attr: ["kafka_failover"]
+      rgw_server: client.0
diff --git a/qa/suites/rgw/sts/auth-order/.qa b/qa/suites/rgw/sts/auth-order/.qa
new file mode 120000
index 00000000000..fea2489fdf6
--- /dev/null
+++ b/qa/suites/rgw/sts/auth-order/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/rgw/sts/auth-order/local-sts.yaml b/qa/suites/rgw/sts/auth-order/local-sts.yaml
new file mode 100644
index 00000000000..2f7dcc6b128
--- /dev/null
+++ b/qa/suites/rgw/sts/auth-order/local-sts.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw s3 auth order: local, sts, external
diff --git a/qa/suites/rgw/sts/auth-order/sts-local.yaml b/qa/suites/rgw/sts/auth-order/sts-local.yaml
new file mode 100644
index 00000000000..a7b00d00f0b
--- /dev/null
+++ b/qa/suites/rgw/sts/auth-order/sts-local.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw s3 auth order: sts, local, external
diff --git a/qa/suites/rgw/tempest/0-install.yaml b/qa/suites/rgw/tempest/0-install.yaml
index f968db20c2b..b6ef17de4ee 100644
--- a/qa/suites/rgw/tempest/0-install.yaml
+++ b/qa/suites/rgw/tempest/0-install.yaml
@@ -4,7 +4,7 @@ tasks:
 - tox: [ client.0 ]
 - keystone:
     client.0:
-      force-branch: stable/2023.1
+      force-branch: stable/2024.1
       services:
         - name: swift
           type: object-store
diff --git a/qa/suites/rgw/tempest/tasks/s3/% b/qa/suites/rgw/tempest/tasks/s3/%
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/qa/suites/rgw/tempest/tasks/s3/%
diff --git a/qa/suites/rgw/tempest/tasks/s3/.qa b/qa/suites/rgw/tempest/tasks/s3/.qa
new file mode 120000
index 00000000000..fea2489fdf6
--- /dev/null
+++ b/qa/suites/rgw/tempest/tasks/s3/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/rgw/tempest/tasks/s3/auth-order/.qa b/qa/suites/rgw/tempest/tasks/s3/auth-order/.qa
new file mode 120000
index 00000000000..fea2489fdf6
--- /dev/null
+++ b/qa/suites/rgw/tempest/tasks/s3/auth-order/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/rgw/tempest/tasks/s3/auth-order/external-local.yaml b/qa/suites/rgw/tempest/tasks/s3/auth-order/external-local.yaml
new file mode 100644
index 00000000000..c46a51e0958
--- /dev/null
+++ b/qa/suites/rgw/tempest/tasks/s3/auth-order/external-local.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw s3 auth order: sts, external, local
diff --git a/qa/suites/rgw/tempest/tasks/s3/auth-order/local-external.yaml b/qa/suites/rgw/tempest/tasks/s3/auth-order/local-external.yaml
new file mode 100644
index 00000000000..a7b00d00f0b
--- /dev/null
+++ b/qa/suites/rgw/tempest/tasks/s3/auth-order/local-external.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rgw s3 auth order: sts, local, external
diff --git a/qa/suites/rgw/tempest/tasks/s3tests.yaml b/qa/suites/rgw/tempest/tasks/s3/s3tests.yaml
index 4efb579fa83..4efb579fa83 100644
--- a/qa/suites/rgw/tempest/tasks/s3tests.yaml
+++ b/qa/suites/rgw/tempest/tasks/s3/s3tests.yaml
diff --git a/qa/suites/rgw/verify/tasks/s3tests-java.yaml b/qa/suites/rgw/verify/tasks/zzz-s3tests-java.yaml
index 9ad89cc6790..9ad89cc6790 100644
--- a/qa/suites/rgw/verify/tasks/s3tests-java.yaml
+++ b/qa/suites/rgw/verify/tasks/zzz-s3tests-java.yaml
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/start.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/start.yaml
index 57e455ba78d..a0adaecf9b2 100644
--- a/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/start.yaml
+++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/start.yaml
@@ -19,6 +19,20 @@ overrides:
       - \(MGR_DOWN\)
       - slow request
       - \(MON_MSGR2_NOT_ENABLED\)
+      - \(OSD_DOWN\)
+      - \(OSD_HOST_DOWN\)
+      - \(POOL_APP_NOT_ENABLED\)
+      - OSD_DOWN
+      - mons down
+      - mon down
+      - MON_DOWN
+      - out of quorum
+      - PG_DEGRADED
+      - Reduced data availability
+      - Degraded data redundancy
+      - OSDMAP_FLAGS
+      - OSD_ROOT_DOWN
+
     conf:
       global:
         enable experimental unrecoverable data corrupting features: "*"
@@ -30,4 +44,3 @@ roles:
   - mgr.x
   - osd.0
   - osd.1
-  - osd.2
-\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/quincy.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/quincy.yaml
index e4897db4d35..48cfa2f756f 100644
--- a/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/quincy.yaml
+++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/quincy.yaml
@@ -18,9 +18,6 @@ tasks:
       mon:
         mon_warn_on_insecure_global_id_reclaim: false
         mon_warn_on_insecure_global_id_reclaim_allowed: false
-    log-ignorelist:
-      - Not found or unloadable
-      - evicting unresponsive client
 - exec:
     osd.0:
       - ceph osd require-osd-release quincy
@@ -30,14 +27,3 @@ overrides:
     conf:
       mon:
         mon warn on osd down out interval zero: false
-    log-ignorelist:
-      - \(POOL_APP_NOT_ENABLED\)
-      - OSD_DOWN
-      - mons down
-      - mon down
-      - MON_DOWN
-      - out of quorum
-      - PG_DEGRADED
-      - Reduced data availability
-      - Degraded data redundancy
-      - OSDMAP_FLAGS
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/2 - upgrade.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/2 - upgrade.yaml
index 6aa429f18b5..fe4ff9bb113 100644
--- a/qa/suites/upgrade/quincy-x/filestore-remove-check/2 - upgrade.yaml
+++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/2 - upgrade.yaml
@@ -3,14 +3,13 @@ meta:
    install upgrade ceph/-x on cluster
    restart : mons, osd.*
 tasks:
+- print: "**** start install.upgrade of nodes"
 - install.upgrade:
-    mon.a:
-- exec:
-    osd.0:
-      - ceph osd require-osd-release quincy
+    all:
 - print: "**** done install.upgrade of nodes"
+- print: "**** start ceph.restart of all osds"
 - ceph.restart:
-    daemons: [mon.a,mgr.x,osd.0,osd.1,osd.2]
+    daemons: [osd.0,osd.1,osd.2]
     mon-health-to-clog: false
     wait-for-healthy: false
     wait-for-osds-up: false
diff --git a/qa/suites/upgrade/quincy-x/parallel/0-start.yaml b/qa/suites/upgrade/quincy-x/parallel/0-start.yaml
index 40fbcefe728..62fb6427f72 100644
--- a/qa/suites/upgrade/quincy-x/parallel/0-start.yaml
+++ b/qa/suites/upgrade/quincy-x/parallel/0-start.yaml
@@ -32,13 +32,22 @@ overrides:
       osd:
         osd shutdown pgref assert: true
     log-ignorelist:
-      - \(POOL_APP_NOT_ENABLED\)
+      - do not have an application enabled
+      - application not enabled
+      - or freeform for custom applications
+      - POOL_APP_NOT_ENABLED
+      - is down
       - OSD_DOWN
       - mons down
       - mon down
       - MON_DOWN
       - out of quorum
+      - PG_AVAILABILITY
       - PG_DEGRADED
       - Reduced data availability
       - Degraded data redundancy
+      - pg .* is stuck inactive
+      - pg .* is .*degraded
+      - FS_DEGRADED
       - OSDMAP_FLAGS
+      - OSD_UPGRADE_FINISHED
diff --git a/qa/suites/upgrade/quincy-x/parallel/1-tasks.yaml b/qa/suites/upgrade/quincy-x/parallel/1-tasks.yaml
index e27c7c0f092..f7167975aa9 100644
--- a/qa/suites/upgrade/quincy-x/parallel/1-tasks.yaml
+++ b/qa/suites/upgrade/quincy-x/parallel/1-tasks.yaml
@@ -1,11 +1,8 @@
 overrides:
   ceph:
     log-ignorelist:
-      - mons down
-      - mon down
-      - MON_DOWN
-      - out of quorum
-      - PG_AVAILABILITY
+      - Telemetry requires re-opt-in
+      - telemetry module includes new collections
 tasks:
 - install:
     branch: quincy
diff --git a/qa/suites/upgrade/quincy-x/parallel/workload/rados_api.yaml b/qa/suites/upgrade/quincy-x/parallel/workload/rados_api.yaml
index 9c2ff9da185..9a0585cc074 100644
--- a/qa/suites/upgrade/quincy-x/parallel/workload/rados_api.yaml
+++ b/qa/suites/upgrade/quincy-x/parallel/workload/rados_api.yaml
@@ -9,4 +9,6 @@ workload:
         clients:
           client.0:
             - cls
+        env:
+          CLS_RBD_GTEST_FILTER: '-TestClsRbd.group_snap_set:TestClsRbd.group_snap_remove'
     - print: "**** done end rados_api.yaml"
diff --git a/qa/suites/upgrade/quincy-x/stress-split/1-start.yaml b/qa/suites/upgrade/quincy-x/stress-split/1-start.yaml
index 005514292ce..5641471629e 100644
--- a/qa/suites/upgrade/quincy-x/stress-split/1-start.yaml
+++ b/qa/suites/upgrade/quincy-x/stress-split/1-start.yaml
@@ -1,17 +1,25 @@
 overrides:
   ceph:
     log-ignorelist:
-      - \(POOL_APP_NOT_ENABLED\)
+      - do not have an application enabled
+      - application not enabled
+      - or freeform for custom applications
+      - POOL_APP_NOT_ENABLED
+      - is down
       - OSD_DOWN
       - mons down
       - mon down
       - MON_DOWN
       - out of quorum
+      - PG_AVAILABILITY
       - PG_DEGRADED
       - Reduced data availability
       - Degraded data redundancy
+      - pg .* is stuck inactive
+      - pg .* is .*degraded
+      - FS_DEGRADED
       - OSDMAP_FLAGS
-      - PG_AVAILABILITY
+      - OSD_UPGRADE_FINISHED
 tasks:
 - install:
     branch: quincy
diff --git a/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-cls.yaml b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-cls.yaml
index b722f187361..a55dddf46f7 100644
--- a/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-cls.yaml
+++ b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-cls.yaml
@@ -7,4 +7,6 @@ first-half-tasks:
     clients:
       client.0:
         - cls/test_cls_rbd.sh
+    env:
+      CLS_RBD_GTEST_FILTER: '-TestClsRbd.group_snap_set:TestClsRbd.group_snap_remove'
 - print: "**** done cls/test_cls_rbd.sh 5-workload"
diff --git a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-cls.yaml b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-cls.yaml
index 649b024a476..d54ba8039d0 100644
--- a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-cls.yaml
+++ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-cls.yaml
@@ -7,4 +7,6 @@ stress-tasks:
     clients:
       client.0:
         - cls/test_cls_rbd.sh
+    env:
+      CLS_RBD_GTEST_FILTER: '-TestClsRbd.group_snap_set:TestClsRbd.group_snap_remove'
 - print: "**** done cls/test_cls_rbd.sh 5-workload"
diff --git a/qa/suites/upgrade/reef-x/parallel/0-start.yaml b/qa/suites/upgrade/reef-x/parallel/0-start.yaml
index 146bd57960d..62fb6427f72 100644
--- a/qa/suites/upgrade/reef-x/parallel/0-start.yaml
+++ b/qa/suites/upgrade/reef-x/parallel/0-start.yaml
@@ -32,4 +32,22 @@ overrides:
       osd:
         osd shutdown pgref assert: true
     log-ignorelist:
-        - PG_DEGRADED
+      - do not have an application enabled
+      - application not enabled
+      - or freeform for custom applications
+      - POOL_APP_NOT_ENABLED
+      - is down
+      - OSD_DOWN
+      - mons down
+      - mon down
+      - MON_DOWN
+      - out of quorum
+      - PG_AVAILABILITY
+      - PG_DEGRADED
+      - Reduced data availability
+      - Degraded data redundancy
+      - pg .* is stuck inactive
+      - pg .* is .*degraded
+      - FS_DEGRADED
+      - OSDMAP_FLAGS
+      - OSD_UPGRADE_FINISHED
diff --git a/qa/suites/upgrade/reef-x/parallel/1-tasks.yaml b/qa/suites/upgrade/reef-x/parallel/1-tasks.yaml
index ce4e0cc228b..b5160c2dd00 100644
--- a/qa/suites/upgrade/reef-x/parallel/1-tasks.yaml
+++ b/qa/suites/upgrade/reef-x/parallel/1-tasks.yaml
@@ -1,12 +1,8 @@
 overrides:
   ceph:
     log-ignorelist:
-      - mons down
-      - mon down
-      - MON_DOWN
-      - out of quorum
-      - PG_AVAILABILITY
-      - PG_DEGRADED
+      - Telemetry requires re-opt-in
+      - telemetry module includes new collections
 tasks:
 - install:
     branch: reef
diff --git a/qa/suites/upgrade/reef-x/parallel/overrides/ignorelist_health.yaml b/qa/suites/upgrade/reef-x/parallel/overrides/ignorelist_health.yaml
new file mode 100644
index 00000000000..fa93b2f2ece
--- /dev/null
+++ b/qa/suites/upgrade/reef-x/parallel/overrides/ignorelist_health.yaml
@@ -0,0 +1,19 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - MDS_ALL_DOWN
+      - MDS_UP_LESS_THAN_MAX
+      - OSD_SLOW_PING_TIME
+      - reached quota
+      - running out of quota
+      - overall HEALTH_
+      - CACHE_POOL_NO_HIT_SET
+      - pool\(s\) full
+      - POOL_FULL
+      - SMALLER_PGP_NUM
+      - SLOW_OPS
+      - CACHE_POOL_NEAR_FULL
+      - OBJECT_MISPLACED
+      - slow request
+      - noscrub
+      - nodeep-scrub
diff --git a/qa/suites/upgrade/reef-x/parallel/workload/rados_api.yaml b/qa/suites/upgrade/reef-x/parallel/workload/rados_api.yaml
index a46e34db5dd..79cf1a96601 100644
--- a/qa/suites/upgrade/reef-x/parallel/workload/rados_api.yaml
+++ b/qa/suites/upgrade/reef-x/parallel/workload/rados_api.yaml
@@ -9,4 +9,6 @@ workload:
         clients:
           client.0:
             - cls
+        env:
+          CLS_RBD_GTEST_FILTER: '-TestClsRbd.group_snap_set:TestClsRbd.group_snap_remove'
     - print: "**** done end rados_api.yaml"
diff --git a/qa/suites/upgrade/reef-x/stress-split/1-start.yaml b/qa/suites/upgrade/reef-x/stress-split/1-start.yaml
index 992f9e1bc36..59ccfe2cd02 100644
--- a/qa/suites/upgrade/reef-x/stress-split/1-start.yaml
+++ b/qa/suites/upgrade/reef-x/stress-split/1-start.yaml
@@ -1,11 +1,25 @@
 overrides:
   ceph:
     log-ignorelist:
+      - do not have an application enabled
+      - application not enabled
+      - or freeform for custom applications
+      - POOL_APP_NOT_ENABLED
+      - is down
+      - OSD_DOWN
       - mons down
       - mon down
       - MON_DOWN
       - out of quorum
       - PG_AVAILABILITY
+      - PG_DEGRADED
+      - Reduced data availability
+      - Degraded data redundancy
+      - pg .* is stuck inactive
+      - pg .* is .*degraded
+      - FS_DEGRADED
+      - OSDMAP_FLAGS
+      - OSD_UPGRADE_FINISHED
 tasks:
 - install:
     branch: reef
diff --git a/qa/suites/upgrade/reef-x/stress-split/2-first-half-tasks/rbd-cls.yaml b/qa/suites/upgrade/reef-x/stress-split/2-first-half-tasks/rbd-cls.yaml
index f092096f444..79ad2af8ea1 100644
--- a/qa/suites/upgrade/reef-x/stress-split/2-first-half-tasks/rbd-cls.yaml
+++ b/qa/suites/upgrade/reef-x/stress-split/2-first-half-tasks/rbd-cls.yaml
@@ -7,4 +7,6 @@ first-half-tasks:
     clients:
       client.0:
         - cls/test_cls_rbd.sh
+    env:
+      CLS_RBD_GTEST_FILTER: '-TestClsRbd.group_snap_set:TestClsRbd.group_snap_remove'
 - print: "**** done cls/test_cls_rbd.sh 5-workload"
diff --git a/qa/suites/upgrade/reef-x/stress-split/3-stress-tasks/rbd-cls.yaml b/qa/suites/upgrade/reef-x/stress-split/3-stress-tasks/rbd-cls.yaml
index 05bb672b3ac..166327a58f9 100644
--- a/qa/suites/upgrade/reef-x/stress-split/3-stress-tasks/rbd-cls.yaml
+++ b/qa/suites/upgrade/reef-x/stress-split/3-stress-tasks/rbd-cls.yaml
@@ -7,4 +7,6 @@ stress-tasks:
     clients:
       client.0:
         - cls/test_cls_rbd.sh
+    env:
+      CLS_RBD_GTEST_FILTER: '-TestClsRbd.group_snap_set:TestClsRbd.group_snap_remove'
 - print: "**** done cls/test_cls_rbd.sh 5-workload"
diff --git a/qa/suites/upgrade/reef-x/stress-split/overrides/ignorelist_health.yaml b/qa/suites/upgrade/reef-x/stress-split/overrides/ignorelist_health.yaml
new file mode 100644
index 00000000000..fa93b2f2ece
--- /dev/null
+++ b/qa/suites/upgrade/reef-x/stress-split/overrides/ignorelist_health.yaml
@@ -0,0 +1,19 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - MDS_ALL_DOWN
+      - MDS_UP_LESS_THAN_MAX
+      - OSD_SLOW_PING_TIME
+      - reached quota
+      - running out of quota
+      - overall HEALTH_
+      - CACHE_POOL_NO_HIT_SET
+      - pool\(s\) full
+      - POOL_FULL
+      - SMALLER_PGP_NUM
+      - SLOW_OPS
+      - CACHE_POOL_NEAR_FULL
+      - OBJECT_MISPLACED
+      - slow request
+      - noscrub
+      - nodeep-scrub
diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py
index 9b04e3dc675..8f666d2fa9b 100644
--- a/qa/tasks/ceph.py
+++ b/qa/tasks/ceph.py
@@ -1206,8 +1206,18 @@ def cluster(ctx, config):
             args.extend([
                 run.Raw('|'), 'head', '-n', '1',
             ])
-            stdout = mon0_remote.sh(args)
-            return stdout or None
+            r = mon0_remote.run(
+                stdout=BytesIO(),
+                args=args,
+                stderr=StringIO(),
+            )
+            stdout = r.stdout.getvalue().decode()
+            if stdout:
+                return stdout
+            stderr = r.stderr.getvalue()
+            if stderr:
+                return stderr
+            return None
 
         if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
                              config['log_ignorelist']) is not None:
diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py
index 7005c8db0ff..57d22f3b5e6 100644
--- a/qa/tasks/ceph_manager.py
+++ b/qa/tasks/ceph_manager.py
@@ -2796,6 +2796,59 @@ class CephManager:
                  num += 1
         return num
 
+    def _print_not_active_clean_pg(self, pgs):
+        """
+        Print the PGs that are not active+clean.
+        """
+        for pg in pgs:
+            if not (pg['state'].count('active') and
+                    pg['state'].count('clean') and
+                    not pg['state'].count('stale')):
+                log.debug(
+                    "PG %s is not active+clean, but %s",
+                    pg['pgid'], pg['state']
+                )
+
+    def pg_all_active_clean(self):
+        """
+        Check if all pgs are active+clean
+        return: True if all pgs are active+clean else False
+        """
+        pgs = self.get_pg_stats()
+        result = self._get_num_active_clean(pgs) == len(pgs)
+        if result:
+            log.debug("All PGs are active+clean")
+        else:
+            log.debug("Not all PGs are active+clean")
+            self._print_not_active_clean_pg(pgs)
+        return result
+
+    def _print_not_active_pg(self, pgs):
+        """
+        Print the PGs that are not active.
+        """
+        for pg in pgs:
+            if not (pg['state'].count('active')
+                    and not pg['state'].count('stale')):
+                log.debug(
+                    "PG %s is not active, but %s",
+                    pg['pgid'], pg['state']
+                )
+
+    def pg_all_active(self):
+        """
+        Check if all pgs are active
+        return: True if all pgs are active else False
+        """
+        pgs = self.get_pg_stats()
+        result = self._get_num_active(pgs) == len(pgs)
+        if result:
+            log.debug("All PGs are active")
+        else:
+            log.debug("Not all PGs are active")
+            self._print_not_active_pg(pgs)
+        return result
+
     def is_clean(self):
         """
         True if all pgs are clean
@@ -3237,6 +3290,26 @@ class CephManager:
             self.make_admin_daemon_dir(remote)
         self.ctx.daemons.get_daemon('mgr', mgr, self.cluster).restart()
 
+    def get_crush_rule_id(self, crush_rule_name):
+        """
+        Get crush rule id by name
+        :returns: int -- crush rule id
+        """
+        out = self.raw_cluster_cmd('osd', 'crush', 'rule', 'dump', '--format=json')
+        j = json.loads('\n'.join(out.split('\n')[1:]))
+        for rule in j:
+            if rule['rule_name'] == crush_rule_name:
+                return rule['rule_id']
+        assert False, 'rule %s not found' % crush_rule_name
+
+    def get_mon_dump_json(self):
+        """
+        mon dump --format=json converted to a python object
+        :returns: the python object
+        """
+        out = self.raw_cluster_cmd('mon', 'dump', '--format=json')
+        return json.loads('\n'.join(out.split('\n')[1:]))
+
     def get_mon_status(self, mon):
         """
         Extract all the monitor status information from the cluster
@@ -3340,6 +3413,23 @@ class CephManager:
         self.log(task_status)
         return task_status
 
+    # Stretch mode related functions
+    def is_degraded_stretch_mode(self):
+        """
+        Return whether the cluster is in degraded stretch mode
+        """
+        try:
+            osdmap = self.get_osd_dump_json()
+            stretch_mode = osdmap.get('stretch_mode', {})
+            degraded_stretch_mode = stretch_mode.get('degraded_stretch_mode', 0)
+            self.log("is_degraded_stretch_mode: {0}".format(degraded_stretch_mode))
+            return degraded_stretch_mode == 1
+        except (TypeError, AttributeError) as e:
+            # Log the error or handle it as needed
+            self.log("Error accessing degraded_stretch_mode: {0}".format(e))
+            return False
+
+
 def utility_task(name):
     """
     Generate ceph_manager subtask corresponding to ceph_manager
diff --git a/qa/tasks/cephadm.py b/qa/tasks/cephadm.py
index dab61c2c700..0cde6050718 100644
--- a/qa/tasks/cephadm.py
+++ b/qa/tasks/cephadm.py
@@ -475,12 +475,16 @@ def ceph_log(ctx, config):
                 run.Raw('|'), 'head', '-n', '1',
             ])
             r = ctx.ceph[cluster_name].bootstrap_remote.run(
-                stdout=StringIO(),
+                stdout=BytesIO(),
                 args=args,
+                stderr=StringIO(),
             )
-            stdout = r.stdout.getvalue()
-            if stdout != '':
+            stdout = r.stdout.getvalue().decode()
+            if stdout:
                 return stdout
+            stderr = r.stderr.getvalue()
+            if stderr:
+                return stderr
             return None
 
         # NOTE: technically the first and third arg to first_in_ceph_log
diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py
index c1312ec5efc..21b96d2b22b 100644
--- a/qa/tasks/cephfs/cephfs_test_case.py
+++ b/qa/tasks/cephfs/cephfs_test_case.py
@@ -252,8 +252,8 @@ class CephFSTestCase(CephTestCase):
     def get_session_data(self, client_id):
         return self._session_by_id(client_id)
 
-    def _session_list(self):
-        ls_data = self.fs.mds_asok(['session', 'ls'])
+    def _session_list(self, rank=None, status=None):
+        ls_data = self.fs.rank_asok(['session', 'ls'], rank=rank, status=status)
         ls_data = [s for s in ls_data if s['state'] not in ['stale', 'closed']]
         return ls_data
 
@@ -269,9 +269,9 @@ class CephFSTestCase(CephTestCase):
     def perf_dump(self, rank=None, status=None):
         return self.fs.rank_asok(['perf', 'dump'], rank=rank, status=status)
 
-    def wait_until_evicted(self, client_id, timeout=30):
+    def wait_until_evicted(self, client_id, rank=None, timeout=30):
         def is_client_evicted():
-            ls = self._session_list()
+            ls = self._session_list(rank=rank)
             for s in ls:
                 if s['id'] == client_id:
                     return False
diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py
index 2b7fd2ee569..3846ef23f97 100644
--- a/qa/tasks/cephfs/filesystem.py
+++ b/qa/tasks/cephfs/filesystem.py
@@ -649,6 +649,8 @@ class FilesystemBase(MDSClusterBase):
     def set_session_timeout(self, timeout):
         self.set_var("session_timeout", "%d" % timeout)
 
+    def set_session_autoclose(self, autoclose_time):
+        self.set_var("session_autoclose", "%d" % autoclose_time)
     def set_allow_standby_replay(self, yes):
         self.set_var("allow_standby_replay", yes)
 
diff --git a/qa/tasks/cephfs/test_admin.py b/qa/tasks/cephfs/test_admin.py
index 00a68dd0183..beb41019e6d 100644
--- a/qa/tasks/cephfs/test_admin.py
+++ b/qa/tasks/cephfs/test_admin.py
@@ -2740,3 +2740,184 @@ class TestFSSetMaxMDS(TestAdminCommands):
         '''
         self.fs.set_max_mds(2, confirm=True)
         self.assertEqual(self.fs.get_var('max_mds'), 2)
+
+
+class TestToggleVolumes(CephFSTestCase):
+    '''
+    Contains code for enabling/disabling mgr/volumes plugin.
+    '''
+
+    VOL_MOD_NAME = 'volumes'
+    CONFIRM = '--yes-i-really-mean-it'
+
+    def tearDown(self):
+        '''
+        Ensure that the volumes plugin is enabled after the test has finished
+        running since not doing so might affect tearDown() of CephFSTestCase or
+        other superclasses.
+        '''
+        json_output = self.get_ceph_cmd_stdout('mgr module ls --format json')
+        json_output = json.loads(json_output)
+
+        if 'volumes' in json_output['force_disabled_modules']:
+            self.run_ceph_cmd(f'mgr module enable {self.VOL_MOD_NAME}')
+
+        super(TestToggleVolumes, self).tearDown()
+
+    def test_force_disable_with_confirmation(self):
+        '''
+        Test that running "ceph mgr module force disable volumes
+        --yes-i-really-mean-it" successfully disables volumes plugin.
+
+        Also test "ceph mgr module ls" output after this.
+        '''
+        self.run_ceph_cmd(f'mgr module force disable {self.VOL_MOD_NAME} '
+                          f'{self.CONFIRM}')
+
+        json_output = self.get_ceph_cmd_stdout('mgr module ls --format json')
+        json_output = json.loads(json_output)
+
+        self.assertIn(self.VOL_MOD_NAME, json_output['always_on_modules'])
+        self.assertIn(self.VOL_MOD_NAME, json_output['force_disabled_modules'])
+
+        self.assertNotIn(self.VOL_MOD_NAME, json_output['enabled_modules'])
+        self.assertNotIn(self.VOL_MOD_NAME, json_output['disabled_modules'])
+
+    def test_force_disable_fails_without_confirmation(self):
+        '''
+        Test that running "ceph mgr module force disable volumes" fails with
+        EPERM when confirmation flag is not passed along.
+
+        Also test that output of this command suggests user to pass
+        --yes-i-really-mean-it.
+        '''
+        proc = self.run_ceph_cmd(
+            f'mgr module force disable {self.VOL_MOD_NAME}',
+            stderr=StringIO(), check_status=False)
+
+        self.assertEqual(proc.returncode, errno.EPERM)
+
+        proc_stderr = proc.stderr.getvalue()
+        self.assertIn('EPERM', proc_stderr)
+        # ensure that the confirmation flag was recommended
+        self.assertIn(self.CONFIRM, proc_stderr)
+
+    def test_force_disable_idempotency(self):
+        '''
+        Test that running "ceph mgr module force disable volumes" passes when
+        volumes plugin was already force disabled.
+        '''
+        self.run_ceph_cmd(f'mgr module force disable {self.VOL_MOD_NAME} '
+                          f'{self.CONFIRM}')
+        sleep(5)
+
+        json_output = self.get_ceph_cmd_stdout('mgr module ls --format '
+                                              'json-pretty')
+        json_output = json.loads(json_output)
+
+        self.assertIn(self.VOL_MOD_NAME, json_output['always_on_modules'])
+        self.assertIn(self.VOL_MOD_NAME, json_output['force_disabled_modules'])
+
+        self.assertNotIn(self.VOL_MOD_NAME, json_output['enabled_modules'])
+        self.assertNotIn(self.VOL_MOD_NAME, json_output['disabled_modules'])
+
+        # XXX: this this test, running this command 2nd time should pass.
+        self.run_ceph_cmd(f'mgr module force disable {self.VOL_MOD_NAME}')
+
+    def test_force_disable_nonexistent_mod(self):
+        '''
+        Test that passing non-existent name to "ceph mgr module force disable"
+        command leads to an error.
+        '''
+        proc = self.run_ceph_cmd(
+            f'mgr module force disable abcd {self.CONFIRM}',
+            check_status=False, stderr=StringIO())
+        self.assertEqual(proc.returncode, errno.EINVAL)
+        self.assertIn('EINVAL', proc.stderr.getvalue())
+
+    def test_force_disable_non_alwayson_mod(self):
+        '''
+        Test that passing non-existent name to "ceph mgr module force disable"
+        command leads to an error.
+        '''
+        json_output = self.get_ceph_cmd_stdout(
+            'mgr module ls --format json-pretty', check_status=False,
+            stderr=StringIO())
+        output_dict = json.loads(json_output)
+        some_non_alwayson_mod = output_dict['enabled_modules'][0]
+
+        proc = self.run_ceph_cmd(
+            f'mgr module force disable {some_non_alwayson_mod} {self.CONFIRM}',
+            check_status=False, stderr=StringIO())
+        self.assertEqual(proc.returncode, errno.EINVAL)
+        self.assertIn('EINVAL', proc.stderr.getvalue())
+
+    def test_enabled_by_default(self):
+        '''
+        Test that volumes plugin is enabled by default and is also reported as
+        "always on".
+        '''
+        json_output = self.get_ceph_cmd_stdout('mgr module ls --format json')
+        json_output = json.loads(json_output)
+
+        self.assertIn(self.VOL_MOD_NAME, json_output['always_on_modules'])
+
+        self.assertNotIn(self.VOL_MOD_NAME, json_output['enabled_modules'])
+        self.assertNotIn(self.VOL_MOD_NAME, json_output['disabled_modules'])
+        self.assertNotIn(self.VOL_MOD_NAME, json_output['force_disabled_modules'])
+
+    def test_disable_fails(self):
+        '''
+        Test that running "ceph mgr module disable volumes" fails with EPERM.
+
+        This is expected since volumes is an always-on module and therefore
+        it can only be disabled using command "ceph mgr module force disable
+        volumes".
+        '''
+        proc = self.run_ceph_cmd(f'mgr module disable {self.VOL_MOD_NAME}',
+                                 stderr=StringIO(), check_status=False)
+        self.assertEqual(proc.returncode, errno.EPERM)
+
+        proc_stderr = proc.stderr.getvalue()
+        self.assertIn('EPERM', proc_stderr)
+
+    def test_enable_idempotency(self):
+        '''
+        Test that enabling volumes plugin when it is already enabled doesn't
+        exit with non-zero return value.
+
+        Also test that it reports plugin as already enabled.
+        '''
+        proc = self.run_ceph_cmd(f'mgr module enable {self.VOL_MOD_NAME}',
+                                 stderr=StringIO())
+        self.assertEqual(proc.returncode, 0)
+
+        proc_stderr = proc.stderr.getvalue()
+        self.assertIn('already enabled', proc_stderr)
+        self.assertIn('always-on', proc_stderr)
+
+    def test_enable_post_disabling(self):
+        '''
+        Test that enabling volumes plugin after (force-)disabling it works
+        successfully.
+
+        Alo test "ceph mgr module ls" output for volumes plugin afterwards.
+        '''
+        self.run_ceph_cmd(f'mgr module force disable {self.VOL_MOD_NAME} '
+                          f'{self.CONFIRM}')
+        # give bit of time for plugin to be disabled.
+        sleep(5)
+
+        self.run_ceph_cmd(f'mgr module enable {self.VOL_MOD_NAME}')
+        # give bit of time for plugin to be functional again
+        sleep(5)
+        json_output = self.get_ceph_cmd_stdout('mgr module ls --format json')
+        json_output = json.loads(json_output)
+        self.assertIn(self.VOL_MOD_NAME, json_output['always_on_modules'])
+        self.assertNotIn(self.VOL_MOD_NAME, json_output['enabled_modules'])
+        self.assertNotIn(self.VOL_MOD_NAME, json_output['disabled_modules'])
+        self.assertNotIn(self.VOL_MOD_NAME, json_output['force_disabled_modules'])
+
+        # plugin is reported properly by "ceph mgr module ls" command, check if
+        # it is also working fine.
+        self.run_ceph_cmd('fs volume ls')
diff --git a/qa/tasks/cephfs/test_exports.py b/qa/tasks/cephfs/test_exports.py
index 16de379f54f..468378fce3d 100644
--- a/qa/tasks/cephfs/test_exports.py
+++ b/qa/tasks/cephfs/test_exports.py
@@ -4,6 +4,7 @@ import time
 from tasks.cephfs.fuse_mount import FuseMount
 from tasks.cephfs.cephfs_test_case import CephFSTestCase
 from teuthology.exceptions import CommandFailedError
+from teuthology.contextutil import safe_while, MaxWhileTries
 
 log = logging.getLogger(__name__)
 
@@ -152,6 +153,8 @@ class TestExportPin(CephFSTestCase):
         # vstart.sh sets mds_debug_subtrees to True. That causes a ESubtreeMap
         # to be written out every event. Yuck!
         self.config_set('mds', 'mds_debug_subtrees', False)
+        # make sure ESubtreeMap is written frequently enough:
+        self.config_set('mds', 'mds_log_minor_segments_per_major_segment', '4')
         self.config_rm('mds', 'mds bal split size') # don't split /top
         self.mount_a.run_shell_payload("rm -rf 1")
 
@@ -628,3 +631,186 @@ done
         log.info("{0} migrations have occured due to the cluster resizing".format(count))
         # rebalancing from 3 -> 2 may cause half of rank 0/1 to move and all of rank 2
         self.assertLessEqual((count/len(subtrees_old)), (1.0/3.0/2.0 + 1.0/3.0/2.0 + 1.0/3.0)*1.25) # aka .66 with 25% overbudget
+
+class TestDumpExportStates(CephFSTestCase):
+    MDSS_REQUIRED = 2
+    CLIENTS_REQUIRED = 1
+
+    EXPORT_STATES = ['locking', 'discovering', 'freezing', 'prepping', 'warning', 'exporting']
+
+    def setUp(self):
+        super().setUp()
+
+        self.fs.set_max_mds(self.MDSS_REQUIRED)
+        self.status = self.fs.wait_for_daemons()
+
+        self.mount_a.run_shell_payload('mkdir -p test/export')
+
+    def tearDown(self):
+        super().tearDown()
+
+    def _wait_for_export_target(self, source, target, sleep=2, timeout=10):
+        try:
+            with safe_while(sleep=sleep, tries=timeout//sleep) as proceed:
+                while proceed():
+                    info = self.fs.getinfo().get_rank(self.fs.id, source)
+                    log.info(f'waiting for rank {target} to be added to the export target')
+                    if target in info['export_targets']:
+                        return
+        except MaxWhileTries as e:
+            raise RuntimeError(f'rank {target} has not been added to export target after {timeout}s') from e
+
+    def _dump_export_state(self, rank):
+        states = self.fs.rank_asok(['dump_export_states'], rank=rank, status=self.status)
+        self.assertTrue(type(states) is list)
+        self.assertEqual(len(states), 1)
+        return states[0]
+
+    def _test_base(self, path, source, target, state_index, kill):
+        self.fs.rank_asok(['config', 'set', 'mds_kill_import_at', str(kill)], rank=target, status=self.status)
+
+        self.fs.rank_asok(['export', 'dir', path, str(target)], rank=source, status=self.status)
+        self._wait_for_export_target(source, target)
+
+        target_rank = self.fs.get_rank(rank=target, status=self.status)
+        self.delete_mds_coredump(target_rank['name'])
+
+        state = self._dump_export_state(source)
+
+        self.assertTrue(type(state['tid']) is int)
+        self.assertEqual(state['path'], path)
+        self.assertEqual(state['state'], self.EXPORT_STATES[state_index])
+        self.assertEqual(state['peer'], target)
+
+        return state
+
+    def _test_state_history(self, state):
+        history = state['state_history']
+        self.assertTrue(type(history) is dict)
+        size = 0
+        for name in self.EXPORT_STATES:
+            self.assertTrue(type(history[name]) is dict)
+            size += 1
+            if name == state['state']:
+                break
+        self.assertEqual(len(history), size)
+
+    def _test_freeze_tree(self, state, waiters):
+        self.assertTrue(type(state['freeze_tree_time']) is float)
+        self.assertEqual(state['unfreeze_tree_waiters'], waiters)
+
+    def test_discovering(self):
+        state = self._test_base('/test', 0, 1, 1, 1)
+
+        self._test_state_history(state)
+        self._test_freeze_tree(state, 0)
+
+        self.assertEqual(state['last_cum_auth_pins'], 0)
+        self.assertEqual(state['num_remote_waiters'], 0)
+
+    def test_prepping(self):
+        client_id = self.mount_a.get_global_id()
+
+        state = self._test_base('/test', 0, 1, 3, 3)
+
+        self._test_state_history(state)
+        self._test_freeze_tree(state, 0)
+
+        self.assertEqual(state['flushed_clients'], [client_id])
+        self.assertTrue(type(state['warning_ack_waiting']) is list)
+
+    def test_exporting(self):
+        state = self._test_base('/test', 0, 1, 5, 5)
+
+        self._test_state_history(state)
+        self._test_freeze_tree(state, 0)
+
+        self.assertTrue(type(state['notify_ack_waiting']) is list)
+
+class TestKillExports(CephFSTestCase):
+    MDSS_REQUIRED = 2
+    CLIENTS_REQUIRED = 1
+
+    def setUp(self):
+        CephFSTestCase.setUp(self)
+
+        self.fs.set_max_mds(self.MDSS_REQUIRED)
+        self.status = self.fs.wait_for_daemons()
+
+        self.mount_a.run_shell_payload('mkdir -p test/export')
+
+    def tearDown(self):
+        super().tearDown()
+
+    def _kill_export_as(self, rank, kill):
+        self.fs.rank_asok(['config', 'set', 'mds_kill_export_at', str(kill)], rank=rank, status=self.status)
+
+    def _export_dir(self, path, source, target):
+        self.fs.rank_asok(['export', 'dir', path, str(target)], rank=source, status=self.status)
+
+    def _wait_failover(self):
+        self.wait_until_true(lambda: self.fs.status().hadfailover(self.status), timeout=self.fs.beacon_timeout)
+
+    def _clear_coredump(self, rank):
+        crash_rank = self.fs.get_rank(rank=rank, status=self.status)
+        self.delete_mds_coredump(crash_rank['name'])
+
+    def _run_kill_export(self, kill_at, exporter_rank=0, importer_rank=1, restart=True):
+        self._kill_export_as(exporter_rank, kill_at)
+        self._export_dir("/test", exporter_rank, importer_rank)
+        self._wait_failover()
+        self._clear_coredump(exporter_rank)
+
+        if restart:
+            self.fs.rank_restart(rank=exporter_rank, status=self.status)
+        self.status = self.fs.wait_for_daemons()
+
+    def test_session_cleanup(self):
+        """
+        Test importer's session cleanup after an export subtree task is interrupted.
+        Set 'mds_kill_export_at' to 9 or 10 so that the importer will wait for the exporter
+        to restart while the state is 'acking'.
+
+        See https://tracker.ceph.com/issues/61459
+        """
+
+        kill_export_at = [9, 10]
+
+        exporter_rank = 0
+        importer_rank = 1
+
+        for kill in kill_export_at:
+            log.info(f"kill_export_at: {kill}")
+            self._run_kill_export(kill, exporter_rank, importer_rank)
+
+            if len(self._session_list(importer_rank, self.status)) > 0:
+                client_id = self.mount_a.get_global_id()
+                self.fs.rank_asok(['session', 'evict', "%s" % client_id], rank=importer_rank, status=self.status)
+
+                # timeout if buggy
+                self.wait_until_evicted(client_id, importer_rank)
+
+            # for multiple tests
+            self.mount_a.remount()
+
+    def test_client_eviction(self):
+        # modify the timeout so that we don't have to wait too long
+        timeout = 30
+        self.fs.set_session_timeout(timeout)
+        self.fs.set_session_autoclose(timeout + 5)
+
+        kill_export_at = [9, 10]
+
+        exporter_rank = 0
+        importer_rank = 1
+
+        for kill in kill_export_at:
+            log.info(f"kill_export_at: {kill}")
+            self._run_kill_export(kill, exporter_rank, importer_rank)
+
+            client_id = self.mount_a.get_global_id()
+            self.wait_until_evicted(client_id, importer_rank, timeout + 10)
+            time.sleep(1)
+
+            # failed if buggy
+            self.mount_a.ls()
diff --git a/qa/tasks/cephfs/test_failover.py b/qa/tasks/cephfs/test_failover.py
index 29af1e76a4f..46139163ddd 100644
--- a/qa/tasks/cephfs/test_failover.py
+++ b/qa/tasks/cephfs/test_failover.py
@@ -1,3 +1,4 @@
+import re
 import time
 import signal
 import logging
@@ -342,6 +343,60 @@ class TestClusterResize(CephFSTestCase):
 
         self.fs.wait_for_daemons(timeout=90)
 
+class TestFailoverBeaconHealth(CephFSTestCase):
+    CLIENTS_REQUIRED = 1
+    MDSS_REQUIRED = 1
+
+    def initiate_journal_replay(self, num_files=100):
+        """ Initiate journal replay by creating files and restarting mds server."""
+
+        self.config_set("mds", "mds_delay_journal_replay_for_testing", "5000")
+        self.mounts[0].test_files = [str(x) for x in range(num_files)]
+        self.mounts[0].create_files()
+        self.fs.fail()
+        self.fs.set_joinable()
+
+    def test_replay_beacon_estimated_time(self):
+        """
+        That beacon emits warning message with estimated time to complete replay
+        """
+        self.initiate_journal_replay()
+        self.wait_for_health("MDS_ESTIMATED_REPLAY_TIME", 60)
+        # remove the config so that replay finishes and the cluster
+        # is HEALTH_OK
+        self.config_rm("mds", "mds_delay_journal_replay_for_testing")
+        self.wait_for_health_clear(timeout=60)
+
+    def test_replay_estimated_time_accuracy(self):
+        self.initiate_journal_replay(250)
+        def replay_complete():
+            health = self.ceph_cluster.mon_manager.get_mon_health(debug=False, detail=True)
+            codes = [s for s in health['checks']]
+            return 'MDS_ESTIMATED_REPLAY_TIME' not in codes
+
+        def get_estimated_time():
+            completion_percentage = 0.0
+            time_duration = pending_duration = 0
+            with safe_while(sleep=5, tries=360) as proceed:
+                while proceed():
+                    health = self.ceph_cluster.mon_manager.get_mon_health(debug=False, detail=True)
+                    codes = [s for s in health['checks']]
+                    if 'MDS_ESTIMATED_REPLAY_TIME' in codes:
+                        message = health['checks']['MDS_ESTIMATED_REPLAY_TIME']['detail'][0]['message']
+                        ### sample warning string: "mds.a(mds.0): replay: 50.0446% complete - elapsed time: 582s, estimated time remaining: 581s"
+                        m = re.match(".* replay: (\d+(\.\d+)?)% complete - elapsed time: (\d+)s, estimated time remaining: (\d+)s", message)
+                        if not m:
+                            continue
+                        completion_percentage = float(m.group(1))
+                        time_duration = int(m.group(3))
+                        pending_duration = int(m.group(4))
+                        log.debug(f"MDS_ESTIMATED_REPLAY_TIME is present in health: {message}, duration: {time_duration}, completion_percentage: {completion_percentage}")
+                        if completion_percentage >= 50:
+                            return (completion_percentage, time_duration, pending_duration)
+        _, _, pending_duration = get_estimated_time()
+        # wait for 25% more time to avoid false negative failures
+        self.wait_until_true(replay_complete, timeout=pending_duration * 1.25)
+
 class TestFailover(CephFSTestCase):
     CLIENTS_REQUIRED = 1
     MDSS_REQUIRED = 2
diff --git a/qa/tasks/cephfs/test_nfs.py b/qa/tasks/cephfs/test_nfs.py
index 19076ea44b3..0a1c07dce04 100644
--- a/qa/tasks/cephfs/test_nfs.py
+++ b/qa/tasks/cephfs/test_nfs.py
@@ -55,7 +55,7 @@ class TestNFS(MgrTestCase):
          "squash": "none",
          "security_label": True,
          "protocols": [
-           4
+           3, 4
          ],
          "transports": [
            "TCP"
@@ -369,6 +369,45 @@ class TestNFS(MgrTestCase):
         except CommandFailedError as e:
             self.fail(f"expected read/write of a file to be successful but failed with {e.exitstatus}")
 
+    def _mnt_nfs(self, pseudo_path, port, ip):
+        '''
+        Mount created export
+        :param pseudo_path: It is the pseudo root name
+        :param port: Port of deployed nfs cluster
+        :param ip: IP of deployed nfs cluster
+        '''
+        tries = 3
+        while True:
+            try:
+                self.ctx.cluster.run(
+                    args=['sudo', 'mount', '-t', 'nfs', '-o', f'port={port}',
+                          f'{ip}:{pseudo_path}', '/mnt'])
+                break
+            except CommandFailedError:
+                if tries:
+                    tries -= 1
+                    time.sleep(2)
+                    continue
+                raise
+
+        self.ctx.cluster.run(args=['sudo', 'chmod', '1777', '/mnt'])
+
+    def _test_fio(self, pseudo_path, port, ip):
+        '''
+        run fio with libaio on /mnt/fio
+        :param mnt_path: nfs mount point
+        '''
+        try:
+            self._mnt_nfs(pseudo_path, port, ip)
+            self.ctx.cluster.run(args=['mkdir', '/mnt/fio'])
+            fio_cmd=['sudo', 'fio', '--ioengine=libaio', '-directory=/mnt/fio', '--filename=fio.randrw.test', '--name=job', '--bs=16k', '--direct=1', '--group_reporting', '--iodepth=128', '--randrepeat=0', '--norandommap=1', '--thread=2', '--ramp_time=20s', '--offset_increment=5%', '--size=5G', '--time_based', '--runtime=300', '--ramp_time=1s', '--percentage_random=0', '--rw=randrw', '--rwmixread=50']
+            self.ctx.cluster.run(args=fio_cmd)
+        except CommandFailedError as e:
+            self.fail(f"expected fio to be successful but failed with {e.exitstatus}")
+        finally:
+            self.ctx.cluster.run(args=['sudo', 'rm', '-rf', '/mnt/fio'])
+            self.ctx.cluster.run(args=['sudo', 'umount', '/mnt'])
+
     def _write_to_read_only_export(self, pseudo_path, port, ip):
         '''
         Check if write to read only export fails
@@ -627,6 +666,18 @@ class TestNFS(MgrTestCase):
         self._test_data_read_write(self.pseudo_path, port, ip)
         self._test_delete_cluster()
 
+    def test_async_io_fio(self):
+        '''
+        Test async io using fio. Expect completion without hang or crash
+        '''
+        self._test_create_cluster()
+        self._create_export(export_id='1', create_fs=True,
+                            extra_cmd=['--pseudo-path', self.pseudo_path])
+        port, ip = self._get_port_ip_info()
+        self._check_nfs_cluster_status('running', 'NFS Ganesha cluster restart failed')
+        self._test_fio(self.pseudo_path, port, ip)
+        self._test_delete_cluster()
+
     def test_cluster_info(self):
         '''
         Test cluster info outputs correct ip and hostname
diff --git a/qa/tasks/cephfs/test_quota.py b/qa/tasks/cephfs/test_quota.py
index b5691c83852..ae1c1f2056c 100644
--- a/qa/tasks/cephfs/test_quota.py
+++ b/qa/tasks/cephfs/test_quota.py
@@ -115,9 +115,11 @@ class TestQuota(CephFSTestCase):
 
         readable_values = {"10K": "10240",
                            "100Ki": "102400",
+                           "100KiB": "102400",
                            "10M": "10485760",
                            "100Mi": "104857600",
                            "2G": "2147483648",
+                           "2GB": "2147483648",
                            "4Gi": "4294967296",
                            "1T": "1099511627776",
                            "2Ti": "2199023255552"}
@@ -135,7 +137,8 @@ class TestQuota(CephFSTestCase):
 
         self.mount_a.run_shell(["mkdir", "subdir"])
 
-        invalid_values = ["10A", "1y00Ki", "af00", "G", "", " ", "-1t", "-1"]
+        invalid_values = ["10A", "1y00Ki", "af00", "G", "", " ", "-1t", "-1",
+                          "1GT", "2MM", "5Di", "8Bi", "i", "7iB"]
         for invalid_value in invalid_values:
             with self.assertRaises(CommandFailedError):
                 self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes",
diff --git a/qa/tasks/check_counter.py b/qa/tasks/check_counter.py
index 40818f3f475..1f63b6a0bd4 100644
--- a/qa/tasks/check_counter.py
+++ b/qa/tasks/check_counter.py
@@ -1,11 +1,14 @@
 
 import logging
 import json
+import errno
 
 from teuthology.task import Task
 from teuthology import misc
 
 from tasks import ceph_manager
+from tasks.cephfs.filesystem import MDSCluster
+from teuthology.exceptions import CommandFailedError
 
 log = logging.getLogger(__name__)
 
@@ -61,6 +64,9 @@ class CheckCounter(Task):
         mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=self.ctx, logger=log.getChild('ceph_manager'))
         active_mgr = json.loads(mon_manager.raw_cluster_cmd("mgr", "dump", "--format=json-pretty"))["active_name"]
 
+        mds_cluster = MDSCluster(self.ctx)
+        status = mds_cluster.status()
+
         for daemon_type, counters in targets.items():
             # List of 'a', 'b', 'c'...
             daemon_ids = list(misc.all_roles_of_type(self.ctx.cluster, daemon_type))
@@ -80,13 +86,31 @@ class CheckCounter(Task):
                 else:
                     log.debug("Getting stats from {0}".format(daemon_id))
 
-                manager = self.ctx.managers[cluster_name]
-                proc = manager.admin_socket(daemon_type, daemon_id, ["perf", "dump"])
-                response_data = proc.stdout.getvalue().strip()
+                if daemon_type == 'mds':
+                    mds_info = status.get_mds(daemon_id)
+                    if not mds_info:
+                        continue
+                    mds = f"mds.{mds_info['gid']}"
+                    if mds_info['state'] != "up:active":
+                        log.debug(f"skipping {mds}")
+                        continue
+                    log.debug(f"Getting stats from {mds}")
+                    try:
+                        proc = mon_manager.raw_cluster_cmd("tell", mds, "perf", "dump",
+                                                           "--format=json-pretty")
+                        response_data = proc.strip()
+                    except CommandFailedError as e:
+                        if e.exitstatus == errno.ENOENT:
+                            log.debug(f"Failed to do 'perf dump' on {mds}")
+                        continue
+                else:
+                    manager = self.ctx.managers[cluster_name]
+                    proc = manager.admin_socket(daemon_type, daemon_id, ["perf", "dump"])
+                    response_data = proc.stdout.getvalue().strip()
                 if response_data:
                     perf_dump = json.loads(response_data)
                 else:
-                    log.warning("No admin socket response from {0}, skipping".format(daemon_id))
+                    log.warning("No response from {0}, skipping".format(daemon_id))
                     continue
 
                 minval = ''
diff --git a/qa/tasks/fwd_scrub.py b/qa/tasks/fwd_scrub.py
index 2ac92439de6..d955d232c2c 100644
--- a/qa/tasks/fwd_scrub.py
+++ b/qa/tasks/fwd_scrub.py
@@ -33,6 +33,8 @@ class ForwardScrubber(ThrasherGreenlet):
     def _run(self):
         try:
             self.do_scrub()
+        except ThrasherGreenlet.Stopped:
+            pass
         except Exception as e:
             self.set_thrasher_exception(e)
             self.logger.exception("exception:")
diff --git a/qa/tasks/kafka.py b/qa/tasks/kafka.py
index 5e6c208ca30..833f03babf6 100644
--- a/qa/tasks/kafka.py
+++ b/qa/tasks/kafka.py
@@ -4,6 +4,7 @@ Deploy and configure Kafka for Teuthology
 import contextlib
 import logging
 import time
+import os
 
 from teuthology import misc as teuthology
 from teuthology import contextutil
@@ -33,6 +34,13 @@ def install_kafka(ctx, config):
     assert isinstance(config, dict)
     log.info('Installing Kafka...')
 
+    # programmatically find a nearby mirror so as not to hammer archive.apache.org
+    apache_mirror_cmd="curl 'https://www.apache.org/dyn/closer.cgi' 2>/dev/null | " \
+        "grep -o '<strong>[^<]*</strong>' | sed 's/<[^>]*>//g' | head -n 1"
+    log.info("determining apache mirror by running: " + apache_mirror_cmd)
+    apache_mirror_url_front = os.popen(apache_mirror_cmd).read().rstrip() # note: includes trailing slash (/)
+    log.info("chosen apache mirror is " + apache_mirror_url_front)
+
     for (client, _) in config.items():
         (remote,) = ctx.cluster.only(client).remotes.keys()
         test_dir=teuthology.get_testdir(ctx)
@@ -40,7 +48,8 @@ def install_kafka(ctx, config):
 
         kafka_file =  kafka_prefix + current_version + '.tgz'
 
-        link1 = 'https://archive.apache.org/dist/kafka/' + current_version + '/' + kafka_file
+        link1 = '{apache_mirror_url_front}/kafka/'.format(apache_mirror_url_front=apache_mirror_url_front) + \
+            current_version + '/' + kafka_file
         ctx.cluster.only(client).run(
             args=['cd', '{tdir}'.format(tdir=test_dir), run.Raw('&&'), 'wget', link1],
         )
diff --git a/qa/tasks/kafka_failover.py b/qa/tasks/kafka_failover.py
new file mode 100644
index 00000000000..3ca60ab84fc
--- /dev/null
+++ b/qa/tasks/kafka_failover.py
@@ -0,0 +1,244 @@
+"""
+Deploy and configure Kafka for Teuthology
+"""
+import contextlib
+import logging
+import time
+import os
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+def get_kafka_version(config):
+    for client, client_config in config.items():
+        if 'kafka_version' in client_config:
+            kafka_version = client_config.get('kafka_version')
+    return kafka_version
+
+kafka_prefix = 'kafka_2.13-'
+
+def get_kafka_dir(ctx, config):
+    kafka_version = get_kafka_version(config)
+    current_version = kafka_prefix + kafka_version
+    return '{tdir}/{ver}'.format(tdir=teuthology.get_testdir(ctx),ver=current_version)
+
+
+@contextlib.contextmanager
+def install_kafka(ctx, config):
+    """
+    Downloading the kafka tar file.
+    """
+    assert isinstance(config, dict)
+    log.info('Installing Kafka...')
+
+    # programmatically find a nearby mirror so as not to hammer archive.apache.org
+    apache_mirror_cmd="curl 'https://www.apache.org/dyn/closer.cgi' 2>/dev/null | " \
+        "grep -o '<strong>[^<]*</strong>' | sed 's/<[^>]*>//g' | head -n 1"
+    log.info("determining apache mirror by running: " + apache_mirror_cmd)
+    apache_mirror_url_front = os.popen(apache_mirror_cmd).read().rstrip() # note: includes trailing slash (/)
+    log.info("chosen apache mirror is " + apache_mirror_url_front)
+
+    for (client, _) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        test_dir=teuthology.get_testdir(ctx)
+        current_version = get_kafka_version(config)
+
+        kafka_file =  kafka_prefix + current_version + '.tgz'
+
+        link1 = '{apache_mirror_url_front}/kafka/'.format(apache_mirror_url_front=apache_mirror_url_front) + \
+            current_version + '/' + kafka_file
+        ctx.cluster.only(client).run(
+            args=['cd', '{tdir}'.format(tdir=test_dir), run.Raw('&&'), 'wget', link1],
+        )
+
+        ctx.cluster.only(client).run(
+            args=['cd', '{tdir}'.format(tdir=test_dir), run.Raw('&&'), 'tar', '-xvzf', kafka_file],
+        )
+
+        kafka_dir = get_kafka_dir(ctx, config)
+        # create config for second broker
+        second_broker_config_name = "server2.properties"
+        second_broker_data = "{tdir}/data/broker02".format(tdir=kafka_dir)
+        second_broker_data_logs_escaped = "{}/logs".format(second_broker_data).replace("/", "\/")
+
+        ctx.cluster.only(client).run(
+            args=['cd', '{tdir}'.format(tdir=kafka_dir), run.Raw('&&'), 
+             'cp', '{tdir}/config/server.properties'.format(tdir=kafka_dir), '{tdir}/config/{second_broker_config_name}'.format(tdir=kafka_dir, second_broker_config_name=second_broker_config_name), run.Raw('&&'), 
+             'mkdir', '-p', '{tdir}/data'.format(tdir=kafka_dir)
+            ],
+        )
+
+        # edit config
+        ctx.cluster.only(client).run(
+            args=['sed', '-i', 's/broker.id=0/broker.id=1/g', '{tdir}/config/{second_broker_config_name}'.format(tdir=kafka_dir, second_broker_config_name=second_broker_config_name), run.Raw('&&'),
+                  'sed', '-i', 's/#listeners=PLAINTEXT:\/\/:9092/listeners=PLAINTEXT:\/\/localhost:19092/g', '{tdir}/config/{second_broker_config_name}'.format(tdir=kafka_dir, second_broker_config_name=second_broker_config_name), run.Raw('&&'),
+                  'sed', '-i', 's/#advertised.listeners=PLAINTEXT:\/\/your.host.name:9092/advertised.listeners=PLAINTEXT:\/\/localhost:19092/g', '{tdir}/config/{second_broker_config_name}'.format(tdir=kafka_dir, second_broker_config_name=second_broker_config_name), run.Raw('&&'),
+                  'sed', '-i', 's/log.dirs=\/tmp\/kafka-logs/log.dirs={}/g'.format(second_broker_data_logs_escaped), '{tdir}/config/{second_broker_config_name}'.format(tdir=kafka_dir, second_broker_config_name=second_broker_config_name), run.Raw('&&'),
+                  'cat', '{tdir}/config/{second_broker_config_name}'.format(tdir=kafka_dir, second_broker_config_name=second_broker_config_name)
+            ]
+        )
+
+    try:
+        yield
+    finally:
+        log.info('Removing packaged dependencies of Kafka...')
+        test_dir=get_kafka_dir(ctx, config)
+        current_version = get_kafka_version(config)
+        for (client,_) in config.items():
+            ctx.cluster.only(client).run(
+                args=['rm', '-rf', '{tdir}/logs'.format(tdir=test_dir)],
+            )
+
+            ctx.cluster.only(client).run(
+                args=['rm', '-rf', test_dir],
+            )
+
+            ctx.cluster.only(client).run(
+                args=['rm', '-rf', '{tdir}/{doc}'.format(tdir=teuthology.get_testdir(ctx),doc=kafka_file)],
+            )
+
+
+@contextlib.contextmanager
+def run_kafka(ctx,config):
+    """
+    This includes two parts:
+    1. Starting Zookeeper service
+    2. Starting Kafka service
+    """
+    assert isinstance(config, dict)
+    log.info('Bringing up Zookeeper and Kafka services...')
+    for (client,_) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+        kafka_dir = get_kafka_dir(ctx, config)
+
+        second_broker_data = "{tdir}/data/broker02".format(tdir=kafka_dir)
+        second_broker_java_log_dir = "{}/java_logs".format(second_broker_data)
+
+        ctx.cluster.only(client).run(
+            args=['cd', '{tdir}/bin'.format(tdir=kafka_dir), run.Raw('&&'),
+             './zookeeper-server-start.sh',
+             '{tir}/config/zookeeper.properties'.format(tir=kafka_dir),
+             run.Raw('&'), 'exit'
+            ],
+        )
+
+        ctx.cluster.only(client).run(
+            args=['cd', '{tdir}/bin'.format(tdir=kafka_dir), run.Raw('&&'),
+             './kafka-server-start.sh',
+             '{tir}/config/server.properties'.format(tir=get_kafka_dir(ctx, config)),
+             run.Raw('&'), 'exit'
+            ],
+        )
+        
+        ctx.cluster.only(client).run(
+            args=['cd', '{tdir}/bin'.format(tdir=kafka_dir), run.Raw('&&'),
+             run.Raw('LOG_DIR={second_broker_java_log_dir}'.format(second_broker_java_log_dir=second_broker_java_log_dir)), 
+             './kafka-server-start.sh', '{tdir}/config/server2.properties'.format(tdir=kafka_dir),
+             run.Raw('&'), 'exit'
+            ],
+        )
+
+    try:
+        yield
+    finally:
+        log.info('Stopping Zookeeper and Kafka Services...')
+
+        for (client, _) in config.items():
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+
+            ctx.cluster.only(client).run(
+                args=['cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'),
+                 './kafka-server-stop.sh',  
+                 '{tir}/config/kafka.properties'.format(tir=get_kafka_dir(ctx, config)),
+                ],
+            )
+
+            time.sleep(5)
+
+            ctx.cluster.only(client).run(
+                args=['cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'), 
+                 './zookeeper-server-stop.sh',
+                 '{tir}/config/zookeeper.properties'.format(tir=get_kafka_dir(ctx, config)),
+                ],
+            )
+
+            time.sleep(5)
+
+            ctx.cluster.only(client).run(args=['killall', '-9', 'java'])
+
+
+@contextlib.contextmanager
+def run_admin_cmds(ctx,config):
+    """
+    Running Kafka Admin commands in order to check the working of producer anf consumer and creation of topic.
+    """
+    assert isinstance(config, dict)
+    log.info('Checking kafka server through producer/consumer commands...')
+    for (client,_) in config.items():
+        (remote,) = ctx.cluster.only(client).remotes.keys()
+
+        ctx.cluster.only(client).run(
+            args=[
+                'cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'), 
+                './kafka-topics.sh', '--create', '--topic', 'quickstart-events',
+                '--bootstrap-server', 'localhost:9092'
+            ],
+        )
+
+        ctx.cluster.only(client).run(
+            args=[
+                'cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'),
+                'echo', "First", run.Raw('|'),
+                './kafka-console-producer.sh', '--topic', 'quickstart-events',
+                '--bootstrap-server', 'localhost:9092'
+            ],
+        )
+
+        ctx.cluster.only(client).run(
+            args=[
+                'cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'),
+                './kafka-console-consumer.sh', '--topic', 'quickstart-events',
+                '--from-beginning',
+                '--bootstrap-server', 'localhost:9092',
+                run.Raw('&'), 'exit'
+            ],
+        )
+
+    try:
+        yield
+    finally:
+        pass
+
+
+@contextlib.contextmanager
+def task(ctx,config):
+    """
+    Following is the way how to run kafka::
+    tasks:
+    - kafka:
+        client.0:
+          kafka_version: 2.6.0
+    """
+    assert config is None or isinstance(config, list) \
+        or isinstance(config, dict), \
+        "task kafka only supports a list or dictionary for configuration"
+
+    all_clients = ['client.{id}'.format(id=id_)
+                   for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+    if config is None:
+        config = all_clients
+    if isinstance(config, list):
+        config = dict.fromkeys(config)
+
+    log.debug('Kafka config is %s', config)
+
+    with contextutil.nested(
+        lambda: install_kafka(ctx=ctx, config=config),
+        lambda: run_kafka(ctx=ctx, config=config),
+        lambda: run_admin_cmds(ctx=ctx, config=config),
+        ):
+        yield
+
diff --git a/qa/tasks/mgr/dashboard/helper.py b/qa/tasks/mgr/dashboard/helper.py
index e6a7c35a23d..55355048a36 100644
--- a/qa/tasks/mgr/dashboard/helper.py
+++ b/qa/tasks/mgr/dashboard/helper.py
@@ -220,13 +220,11 @@ class DashboardTestCase(MgrTestCase):
 
             # To avoid any issues with e.g. unlink bugs, we destroy and recreate
             # the filesystem rather than just doing a rm -rf of files
-            cls.mds_cluster.mds_stop()
-            cls.mds_cluster.mds_fail()
             cls.mds_cluster.delete_all_filesystems()
+            cls.mds_cluster.mds_restart()  # to reset any run-time configs, etc.
             cls.fs = None  # is now invalid!
 
             cls.fs = cls.mds_cluster.newfs(create=True)
-            cls.fs.mds_restart()
 
             # In case some test messed with auth caps, reset them
             # pylint: disable=not-an-iterable
diff --git a/qa/tasks/mgr/dashboard/test_mgr_module.py b/qa/tasks/mgr/dashboard/test_mgr_module.py
index d6a368905b6..1dbdef23d34 100644
--- a/qa/tasks/mgr/dashboard/test_mgr_module.py
+++ b/qa/tasks/mgr/dashboard/test_mgr_module.py
@@ -4,6 +4,7 @@ from __future__ import absolute_import
 import logging
 
 import requests
+from urllib3.exceptions import MaxRetryError
 
 from .helper import (DashboardTestCase, JLeaf, JList, JObj,
                      module_options_object_schema, module_options_schema,
@@ -24,10 +25,11 @@ class MgrModuleTestCase(DashboardTestCase):
         def _check_connection():
             try:
                 # Try reaching an API endpoint successfully.
+                logger.info('Trying to reach the REST API endpoint')
                 self._get('/api/mgr/module')
                 if self._resp.status_code == 200:
                     return True
-            except requests.ConnectionError:
+            except (MaxRetryError, requests.ConnectionError):
                 pass
             return False
 
diff --git a/qa/tasks/mgr/dashboard/test_rbd.py b/qa/tasks/mgr/dashboard/test_rbd.py
index a872645e33e..83b3bf520c2 100644
--- a/qa/tasks/mgr/dashboard/test_rbd.py
+++ b/qa/tasks/mgr/dashboard/test_rbd.py
@@ -869,7 +869,19 @@ class RbdTest(DashboardTestCase):
         self.assertEqual(clone_format_version, 2)
         self.assertStatus(200)
 
+        # if empty list is sent, then the config will remain as it is
         value = []
+        res = [{'section': "global", 'value': "2"}]
+        self._post('/api/cluster_conf', {
+            'name': config_name,
+            'value': value
+        })
+        self.wait_until_equal(
+            lambda: _get_config_by_name(config_name),
+            res,
+            timeout=60)
+
+        value = [{'section': "global", 'value': ""}]
         self._post('/api/cluster_conf', {
             'name': config_name,
             'value': value
diff --git a/qa/tasks/mgr/dashboard/test_rgw.py b/qa/tasks/mgr/dashboard/test_rgw.py
index 5c7b0329675..a9071bc2a3a 100644
--- a/qa/tasks/mgr/dashboard/test_rgw.py
+++ b/qa/tasks/mgr/dashboard/test_rgw.py
@@ -785,7 +785,7 @@ class RgwUserSubuserTest(RgwTestCase):
                 'access': 'readwrite',
                 'key_type': 'swift'
             })
-        self.assertStatus(200)
+        self.assertStatus(201)
         data = self.jsonBody()
         subuser = self.find_object_in_list('id', 'teuth-test-user:tux', data)
         self.assertIsInstance(subuser, object)
@@ -808,7 +808,7 @@ class RgwUserSubuserTest(RgwTestCase):
                 'access_key': 'yyy',
                 'secret_key': 'xxx'
             })
-        self.assertStatus(200)
+        self.assertStatus(201)
         data = self.jsonBody()
         subuser = self.find_object_in_list('id', 'teuth-test-user:hugo', data)
         self.assertIsInstance(subuser, object)
diff --git a/qa/tasks/mgr/mgr_test_case.py b/qa/tasks/mgr/mgr_test_case.py
index 74b1e9d850c..4a5506391f2 100644
--- a/qa/tasks/mgr/mgr_test_case.py
+++ b/qa/tasks/mgr/mgr_test_case.py
@@ -1,5 +1,6 @@
 import json
 import logging
+import socket
 
 from unittest import SkipTest
 
@@ -108,7 +109,7 @@ class MgrTestCase(CephTestCase):
         # Unload all non-default plugins
         loaded = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd(
                    "mgr", "module", "ls", "--format=json-pretty"))['enabled_modules']
-        unload_modules = set(loaded) - {"cephadm", "restful"}
+        unload_modules = set(loaded) - {"cephadm"}
 
         for m in unload_modules:
             cls.mgr_cluster.mon_manager.raw_cluster_cmd(
@@ -137,7 +138,7 @@ class MgrTestCase(CephTestCase):
             raise SkipTest(
                 "Only have {0} manager daemons, {1} are required".format(
                     len(cls.mgr_cluster.mgr_ids), cls.MGRS_REQUIRED))
-        
+
         # We expect laggy OSDs in this testing environment so turn off this warning.
         # See https://tracker.ceph.com/issues/61907
         cls.mgr_cluster.mon_manager.raw_cluster_cmd('config', 'set', 'mds',
@@ -229,15 +230,22 @@ class MgrTestCase(CephTestCase):
         """
         # Start handing out ports well above Ceph's range.
         assign_port = min_port
+        ip_addr = cls.mgr_cluster.get_mgr_map()['active_addr'].split(':')[0]
 
         for mgr_id in cls.mgr_cluster.mgr_ids:
             cls.mgr_cluster.mgr_stop(mgr_id)
             cls.mgr_cluster.mgr_fail(mgr_id)
 
+
         for mgr_id in cls.mgr_cluster.mgr_ids:
-            log.debug("Using port {0} for {1} on mgr.{2}".format(
-                assign_port, module_name, mgr_id
-            ))
+            # Find a port that isn't in use
+            while True:
+                if not cls.is_port_in_use(ip_addr, assign_port):
+                    break
+                log.debug(f"Port {assign_port} in use, trying next")
+                assign_port += 1
+
+            log.debug(f"Using port {assign_port} for {module_name} on mgr.{mgr_id}")
             cls.mgr_cluster.set_module_localized_conf(module_name, mgr_id,
                                                       config_name,
                                                       str(assign_port),
@@ -255,3 +263,8 @@ class MgrTestCase(CephTestCase):
                     mgr_map['active_name'], mgr_map['active_gid']))
             return done
         cls.wait_until_true(is_available, timeout=30)
+
+    @classmethod
+    def is_port_in_use(cls, ip_addr: str, port: int) -> bool:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            return s.connect_ex((ip_addr, port)) == 0
diff --git a/qa/tasks/mgr/test_module_selftest.py b/qa/tasks/mgr/test_module_selftest.py
index 7ac2960371c..c41a95c71f7 100644
--- a/qa/tasks/mgr/test_module_selftest.py
+++ b/qa/tasks/mgr/test_module_selftest.py
@@ -36,13 +36,6 @@ class TestModuleSelftest(MgrTestCase):
         self.mgr_cluster.mon_manager.raw_cluster_cmd(
                 "mgr", "self-test", "module", module_name)
 
-    def test_zabbix(self):
-        # Set these mandatory config fields so that the zabbix module
-        # won't trigger health/log errors on load/serve.
-        self.mgr_cluster.set_module_conf("zabbix", "zabbix_host", "localhost")
-        self.mgr_cluster.set_module_conf("zabbix", "identifier", "foo")
-        self._selftest_plugin("zabbix")
-
     def test_prometheus(self):
         self._assign_ports("prometheus", "server_port", min_port=8100)
         self._selftest_plugin("prometheus")
diff --git a/qa/tasks/notification_tests.py b/qa/tasks/notification_tests.py
index b4697a6f797..f1eae3c89c4 100644
--- a/qa/tasks/notification_tests.py
+++ b/qa/tasks/notification_tests.py
@@ -220,7 +220,7 @@ def run_tests(ctx, config):
     for client, client_config in config.items():
         (remote,) = ctx.cluster.only(client).remotes.keys()
 
-        attr = ["!kafka_test", "!data_path_v2_kafka_test", "!amqp_test", "!amqp_ssl_test", "!kafka_security_test", "!modification_required", "!manual_test", "!http_test"]
+        attr = ["!kafka_test", "!data_path_v2_kafka_test", "!kafka_failover", "!amqp_test", "!amqp_ssl_test", "!kafka_security_test", "!modification_required", "!manual_test", "!http_test"]
 
         if 'extra_attr' in client_config:
             attr = client_config.get('extra_attr')
diff --git a/qa/tasks/nvme_loop.py b/qa/tasks/nvme_loop.py
index fef270ea085..fdec467a16d 100644
--- a/qa/tasks/nvme_loop.py
+++ b/qa/tasks/nvme_loop.py
@@ -70,7 +70,7 @@ def task(ctx, config):
                 remote.run(args=['lsblk'], stdout=StringIO())
                 p = remote.run(args=['sudo', 'nvme', 'list', '-o', 'json'], stdout=StringIO())
                 new_devs = []
-                # `nvme list -o json` will return the following output:
+                # `nvme list -o json` will return one of the following output:
                 '''{
                      "Devices" : [
                        {
@@ -91,13 +91,112 @@ def task(ctx, config):
                        }
                      ]
                    }'''
+                '''{
+                  "Devices":[
+                    {
+                      "HostNQN":"nqn.2014-08.org.nvmexpress:uuid:00000000-0000-0000-0000-0cc47ada6ba4",
+                      "HostID":"898a0e10-da2d-4a42-8017-d9c445089d0c",
+                      "Subsystems":[
+                        {
+                          "Subsystem":"nvme-subsys0",
+                          "SubsystemNQN":"nqn.2014.08.org.nvmexpress:80868086CVFT623300LN400BGN  INTEL SSDPEDMD400G4",
+                          "Controllers":[
+                            {
+                              "Controller":"nvme0",
+                              "Cntlid":"0",
+                              "SerialNumber":"CVFT623300LN400BGN",
+                              "ModelNumber":"INTEL SSDPEDMD400G4",
+                              "Firmware":"8DV101H0",
+                              "Transport":"pcie",
+                              "Address":"0000:02:00.0",
+                              "Slot":"2",
+                              "Namespaces":[
+                                {
+                                  "NameSpace":"nvme0n1",
+                                  "Generic":"ng0n1",
+                                  "NSID":1,
+                                  "UsedBytes":400088457216,
+                                  "MaximumLBA":781422768,
+                                  "PhysicalSize":400088457216,
+                                  "SectorSize":512
+                                }
+                              ],
+                              "Paths":[
+                              ]
+                            }
+                          ],
+                          "Namespaces":[
+                          ]
+                        }
+                      ]
+                    }
+                  ]
+                }
+                '''
+                '''{
+                  "Devices":[
+                    {
+                      "HostNQN":"nqn.2014-08.org.nvmexpress:uuid:00000000-0000-0000-0000-0cc47ada6ba4",
+                      "HostID":"898a0e10-da2d-4a42-8017-d9c445089d0c",
+                      "Subsystems":[
+                        {
+                          "Subsystem":"nvme-subsys0",
+                          "SubsystemNQN":"nqn.2014.08.org.nvmexpress:80868086CVFT534400C2400BGN  INTEL SSDPEDMD400G4",
+                          "Controllers":[
+                            {
+                              "Controller":"nvme0",
+                              "Cntlid":"0",
+                              "SerialNumber":"CVFT534400C2400BGN",
+                              "ModelNumber":"INTEL SSDPEDMD400G4",
+                              "Firmware":"8DV101H0",
+                              "Transport":"pcie",
+                              "Address":"0000:02:00.0",
+                              "Slot":"2",
+                              "Namespaces":[
+                                {
+                                  "NameSpace":"nvme0n1",
+                                  "Generic":"ng0n1",
+                                  "NSID":1,
+                                  "UsedBytes":400088457216,
+                                  "MaximumLBA":781422768,
+                                  "PhysicalSize":400088457216,
+                                  "SectorSize":512
+                                }
+                              ],
+                              "Paths":[
+                              ]
+                            }
+                          ],
+                          "Namespaces":[
+                          ]
+                        }
+                      ]
+                    }
+                  ]
+                }
+                '''
                 nvme_list = json.loads(p.stdout.getvalue())
                 for device in nvme_list['Devices']:
-                    dev = device['DevicePath']
-                    vendor = device['ModelNumber']
-                    if dev.startswith('/dev/') and vendor == 'Linux':
-                        new_devs.append(dev)
-                        bluestore_zap(remote, dev)
+                    try:
+                        # first try format 1 / older format
+                        dev = device['DevicePath']
+                        vendor = device['ModelNumber']
+                        if dev.startswith('/dev/') and vendor == 'Linux':
+                            new_devs.append(dev)
+                            bluestore_zap(remote, dev)
+                    except KeyError:
+                        for subsystem in device['Subsystems']:
+                            # format 2
+                            if 'Namespaces' in subsystem and subsystem['Namespaces']:
+                                dev = '/dev/' + subsystem['Namespaces'][0]['NameSpace']
+                            # try format 3 last
+                            else:
+                                dev = '/dev/' + subsystem['Controllers'][0]['Namespaces'][0]['NameSpace']
+                            # vendor is the same for format 2 and 3
+                            vendor = subsystem['Controllers'][0]['ModelNumber']
+                            if vendor == 'Linux':
+                                new_devs.append(dev)
+                                bluestore_zap(remote, dev)
                 log.info(f'new_devs {new_devs}')
                 assert len(new_devs) <= len(devs)
                 if len(new_devs) == len(devs):
diff --git a/qa/tasks/nvmeof.py b/qa/tasks/nvmeof.py
index b89f123c97e..691a6f7dd86 100644
--- a/qa/tasks/nvmeof.py
+++ b/qa/tasks/nvmeof.py
@@ -32,6 +32,7 @@ class Nvmeof(Task):
             gateway_config:
                 namespaces_count: 10
                 cli_version: latest
+                create_mtls_secrets: False 
                     
     """
 
@@ -69,6 +70,7 @@ class Nvmeof(Task):
         self.serial = gateway_config.get('serial', 'SPDK00000000000001')
         self.port = gateway_config.get('port', '4420')
         self.srport = gateway_config.get('srport', '5500')
+        self.create_mtls_secrets = gateway_config.get('create_mtls_secrets', False)
 
     def deploy_nvmeof(self):
         """
@@ -126,12 +128,11 @@ class Nvmeof(Task):
 
             total_images = int(self.namespaces_count) * int(self.subsystems_count)
             log.info(f'[nvmeof]: creating {total_images} images')
+            rbd_create_cmd = []
             for i in range(1, total_images + 1):
                 imagename = self.image_name_prefix + str(i)
-                log.info(f'[nvmeof]: rbd create {poolname}/{imagename} --size {self.rbd_size}')
-                _shell(self.ctx, self.cluster_name, self.remote, [
-                    'rbd', 'create', f'{poolname}/{imagename}', '--size', f'{self.rbd_size}'
-                ])
+                rbd_create_cmd += ['rbd', 'create', f'{poolname}/{imagename}', '--size', f'{self.rbd_size}', run.Raw(';')]           
+            _shell(self.ctx, self.cluster_name, self.remote, rbd_create_cmd)     
 
         for role, i in daemons.items():
             remote, id_ = i
@@ -147,7 +148,38 @@ class Nvmeof(Task):
                 started=True,
             )
         log.info("[nvmeof]: executed deploy_nvmeof successfully!")
-        
+
+    def write_mtls_config(self, gateway_ips):
+        log.info("[nvmeof]: writing mtls config...")
+        allowed_ips = ""
+        for ip in gateway_ips:
+            allowed_ips += ("IP:" + ip + ",")
+        self.remote.run(
+            args=[
+                "sudo", "openssl", "req", "-x509", "-newkey", "rsa:4096", "-nodes", "-keyout", "/etc/ceph/server.key",
+                "-out", "/etc/ceph/server.crt", "-days", "3650", "-subj", "/CN=my.server", "-addext", f"subjectAltName={allowed_ips[:-1]}" 
+            ]
+        )
+        self.remote.run(
+            args=[
+                "sudo", "openssl", "req", "-x509", "-newkey", "rsa:4096", "-nodes", "-keyout", "/etc/ceph/client.key",
+                "-out", "/etc/ceph/client.crt", "-days", "3650", "-subj", "/CN=client1"
+            ]
+        )
+        secrets_files = {"/etc/ceph/server.key": None, 
+                 "/etc/ceph/server.crt": None, 
+                 "/etc/ceph/client.key": None, 
+                 "/etc/ceph/client.crt": None, 
+                }
+        for file in secrets_files.keys():
+            secrets_files[file] = self.remote.read_file(path=file, sudo=True)
+
+        for remote in self.ctx.cluster.remotes.keys():
+            for remote_file in secrets_files.keys():
+                data = secrets_files[remote_file]
+                remote.sudo_write_file(path=remote_file, data=data, mode='0644')
+        log.info("[nvmeof]: written mtls config!")
+
     def set_gateway_cfg(self):
         log.info('[nvmeof]: running set_gateway_cfg...')
         ip_address = self.remote.ip_address
@@ -174,6 +206,8 @@ class Nvmeof(Task):
                 data=conf_data,
                 sudo=True
             )
+        if self.create_mtls_secrets: 
+            self.write_mtls_config(gateway_ips)
         log.info("[nvmeof]: executed set_gateway_cfg successfully!")
 
 
@@ -216,9 +250,9 @@ class NvmeofThrasher(Thrasher, Greenlet):
 
     daemon_max_thrash_times: 
                         For now, NVMeoF daemons have limitation that each daemon can 
-                        be thrashed only 3 times in span of 30 mins. This option 
+                        be thrashed only 5 times in span of 30 mins. This option 
                         allows to set the amount of times it could be thrashed in a period
-                        of time. (default: 3)
+                        of time. (default: 5)
     daemon_max_thrash_period: 
                         This option goes with the above option. It sets the period of time
                         over which each daemons can be thrashed for daemon_max_thrash_times
@@ -271,17 +305,17 @@ class NvmeofThrasher(Thrasher, Greenlet):
         self.max_thrash_daemons = int(self.config.get('max_thrash', len(self.daemons) - 1))
 
         # Limits on thrashing each daemon
-        self.daemon_max_thrash_times = int(self.config.get('daemon_max_thrash_times', 3))
+        self.daemon_max_thrash_times = int(self.config.get('daemon_max_thrash_times', 5))
         self.daemon_max_thrash_period = int(self.config.get('daemon_max_thrash_period', 30 * 60)) # seconds
 
         self.min_thrash_delay = int(self.config.get('min_thrash_delay', 60))
         self.max_thrash_delay = int(self.config.get('max_thrash_delay', self.min_thrash_delay + 30))
-        self.min_revive_delay = int(self.config.get('min_revive_delay', 100))
+        self.min_revive_delay = int(self.config.get('min_revive_delay', 60))
         self.max_revive_delay = int(self.config.get('max_revive_delay', self.min_revive_delay + 30))
 
     def _get_devices(self, remote):
         GET_DEVICE_CMD = "sudo nvme list --output-format=json | " \
-            "jq -r '.Devices | sort_by(.NameSpace) | .[] | select(.ModelNumber == \"Ceph bdev Controller\") | .DevicePath'"
+            "jq -r '.Devices[].Subsystems[] | select(.Controllers | all(.ModelNumber == \"Ceph bdev Controller\")) | .Namespaces | sort_by(.NSID) | .[] | .NameSpace'"
         devices = remote.sh(GET_DEVICE_CMD).split()
         return devices
     
@@ -312,6 +346,7 @@ class NvmeofThrasher(Thrasher, Greenlet):
             run.Raw('&&'), 'ceph', 'orch', 'ps', '--daemon-type', 'nvmeof',
             run.Raw('&&'), 'ceph', 'health', 'detail',
             run.Raw('&&'), 'ceph', '-s',
+            run.Raw('&&'), 'sudo', 'nvme', 'list',
         ]
         for dev in self.devices:
             check_cmd += [
@@ -386,13 +421,11 @@ class NvmeofThrasher(Thrasher, Greenlet):
         while not self.stopping.is_set():
             killed_daemons = defaultdict(list)
 
-            weight = 1.0 / len(self.daemons)
-            count = 0
+            thrash_daemon_num = self.rng.randint(1, self.max_thrash_daemons)
+            selected_daemons = self.rng.sample(self.daemons, thrash_daemon_num)
             for daemon in self.daemons:
-                skip = self.rng.uniform(0.0, 1.0)
-                if weight <= skip:
-                    self.log('skipping daemon {label} with skip ({skip}) > weight ({weight})'.format(
-                        label=daemon.id_, skip=skip, weight=weight))
+                if daemon not in selected_daemons:
+                    self.log(f'skipping daemon {daemon.id_} ...')
                     continue
 
                 # For now, nvmeof daemons can only be thrashed 3 times in last 30mins. 
@@ -410,17 +443,11 @@ class NvmeofThrasher(Thrasher, Greenlet):
                         continue
 
                 self.log('kill {label}'.format(label=daemon.id_))
-                # daemon.stop()
                 kill_method = self.kill_daemon(daemon)
 
                 killed_daemons[kill_method].append(daemon)
                 daemons_thrash_history[daemon.id_] += [datetime.now()]
 
-                # only thrash max_thrash_daemons amount of daemons
-                count += 1
-                if count >= self.max_thrash_daemons:
-                    break
-
             if killed_daemons:
                 iteration_summary = "thrashed- "
                 for kill_method in killed_daemons:
@@ -433,7 +460,7 @@ class NvmeofThrasher(Thrasher, Greenlet):
 
                 self.log(f'waiting for {revive_delay} secs before reviving')
                 time.sleep(revive_delay) # blocking wait
-                self.log('done waiting before reviving')
+                self.log(f'done waiting before reviving - iteration #{len(summary)}: {iteration_summary}')
 
                 self.do_checks()
                 self.switch_task()
@@ -452,7 +479,7 @@ class NvmeofThrasher(Thrasher, Greenlet):
                 if thrash_delay > 0.0:
                     self.log(f'waiting for {thrash_delay} secs before thrashing')
                     time.sleep(thrash_delay) # blocking
-                    self.log('done waiting before thrashing')
+                    self.log('done waiting before thrashing - everything should be up now')
 
                 self.do_checks()
                 self.switch_task()
diff --git a/qa/tasks/rados.py b/qa/tasks/rados.py
index d8eac5d886f..96bcc770511 100644
--- a/qa/tasks/rados.py
+++ b/qa/tasks/rados.py
@@ -36,6 +36,8 @@ def task(ctx, config):
 	  write_fadvise_dontneed: write behavior like with LIBRADOS_OP_FLAG_FADVISE_DONTNEED.
 	                          This mean data don't access in the near future.
 				  Let osd backend don't keep data in cache.
+	  pct_update_delay: delay before primary propogates pct on write pause,
+                            defaults to 5s if balance_reads is set
 
     For example::
 
@@ -139,6 +141,7 @@ def task(ctx, config):
     object_size = int(config.get('object_size', 4000000))
     op_weights = config.get('op_weights', {})
     testdir = teuthology.get_testdir(ctx)
+    pct_update_delay = None
     args = [
         'adjust-ulimits',
         'ceph-coverage',
@@ -166,6 +169,7 @@ def task(ctx, config):
         args.extend(['--pool-snaps'])
     if config.get('balance_reads', False):
         args.extend(['--balance-reads'])
+        pct_update_delay = config.get('pct_update_delay', 5);
     if config.get('localize_reads', False):
         args.extend(['--localize-reads'])
     if config.get('max_attr_len', None):
@@ -274,6 +278,10 @@ def task(ctx, config):
                     if config.get('fast_read', False):
                         manager.raw_cluster_cmd(
                             'osd', 'pool', 'set', pool, 'fast_read', 'true')
+                    if pct_update_delay:
+                        manager.raw_cluster_cmd(
+                            'osd', 'pool', 'set', pool,
+                            'pct_update_delay', str(pct_update_delay));
                     min_size = config.get('min_size', None);
                     if min_size is not None:
                         manager.raw_cluster_cmd(
diff --git a/qa/tasks/radosgw_admin.py b/qa/tasks/radosgw_admin.py
index 3b98702acca..fb82378761b 100644
--- a/qa/tasks/radosgw_admin.py
+++ b/qa/tasks/radosgw_admin.py
@@ -16,6 +16,7 @@ import logging
 import time
 import datetime
 import sys
+import errno
 
 from io import StringIO
 from queue import Queue
@@ -725,6 +726,40 @@ def task(ctx, config):
     (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--tenant', tenant_name, '--uid', 'tenanteduser'],
         check_status=True)
 
+    account_id = 'RGW12312312312312312'
+    account_name = 'testacct'
+    rgwadmin(ctx, client, [
+            'account', 'create',
+            '--account-id', account_id,
+            '--account-name', account_name,
+            ], check_status=True)
+    rgwadmin(ctx, client, [
+            'user', 'create',
+            '--account-id', account_id,
+            '--uid', 'testacctuser',
+            '--display-name', 'accountuser',
+            '--gen-access-key',
+            '--gen-secret',
+            ], check_status=True)
+
+    # TESTCASE 'bucket link', 'bucket', 'account user', 'fails'
+    (err, out) = rgwadmin(ctx, client, ['bucket', 'link', '--bucket', bucket_name, '--uid', 'testacctuser'])
+    assert err == errno.EINVAL
+
+    rgwadmin(ctx, client, ['user', 'rm', '--uid', 'testacctuser'], check_status=True)
+
+    # TESTCASE 'bucket link', 'bucket', 'account', 'succeeds'
+    rgwadmin(ctx, client,
+        ['bucket', 'link', '--bucket', bucket_name, '--account-id', account_id],
+        check_status=True)
+
+    # relink the bucket to the first user and delete the account
+    rgwadmin(ctx, client,
+        ['bucket', 'link', '--bucket', bucket_name, '--uid', user1],
+        check_status=True)
+    rgwadmin(ctx, client, ['account', 'rm', '--account-id', account_id],
+        check_status=True)
+
     # TESTCASE 'object-rm', 'object', 'rm', 'remove object', 'succeeds, object is removed'
 
     # upload an object
diff --git a/qa/tasks/rgw_multisite.py b/qa/tasks/rgw_multisite.py
index e83a54efc2b..f93ca017fa2 100644
--- a/qa/tasks/rgw_multisite.py
+++ b/qa/tasks/rgw_multisite.py
@@ -361,6 +361,8 @@ def create_zonegroup(cluster, gateways, period, config):
     if endpoints:
         # replace client names with their gateway endpoints
         config['endpoints'] = extract_gateway_endpoints(gateways, endpoints)
+    if not config.get('api_name'): # otherwise it will be set to an empty string
+        config['api_name'] = config['name']
     zonegroup = multisite.ZoneGroup(config['name'], period)
     # `zonegroup set` needs --default on command line, and 'is_master' in json
     args = is_default_arg(config)
diff --git a/qa/tasks/rook.py b/qa/tasks/rook.py
index 6cb75173966..fae5ef3bf00 100644
--- a/qa/tasks/rook.py
+++ b/qa/tasks/rook.py
@@ -8,7 +8,7 @@ import json
 import logging
 import os
 import yaml
-from io import BytesIO
+from io import BytesIO, StringIO
 
 from tarfile import ReadError
 from tasks.ceph_manager import CephManager
@@ -235,10 +235,14 @@ def ceph_log(ctx, config):
             r = ctx.rook[cluster_name].remote.run(
                 stdout=BytesIO(),
                 args=args,
+                stderr=StringIO(),
             )
             stdout = r.stdout.getvalue().decode()
             if stdout:
                 return stdout
+            stderr = r.stderr.getvalue()
+            if stderr:
+                return stderr
             return None
 
         if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
diff --git a/qa/tasks/s3a_hadoop.py b/qa/tasks/s3a_hadoop.py
index 7b77359fcf2..4518a6f397c 100644
--- a/qa/tasks/s3a_hadoop.py
+++ b/qa/tasks/s3a_hadoop.py
@@ -1,5 +1,6 @@
 import contextlib
 import logging
+import os
 from teuthology import misc
 from teuthology.orchestra import run
 
@@ -40,7 +41,7 @@ def task(ctx, config):
 
     # get versions
     maven_major = config.get('maven-major', 'maven-3')
-    maven_version = config.get('maven-version', '3.6.3')
+    maven_version = config.get('maven-version', '3.9.9')
     hadoop_ver = config.get('hadoop-version', '2.9.2')
     bucket_name = config.get('bucket-name', 's3atest')
     access_key = config.get('access-key', 'EGAQRD2ULOIFKFSKCT4F')
@@ -48,11 +49,19 @@ def task(ctx, config):
         'secret-key',
         'zi816w1vZKfaSM85Cl0BxXTwSLyN7zB4RbTswrGb')
 
+    # programmatically find a nearby mirror so as not to hammer archive.apache.org
+    apache_mirror_cmd="curl 'https://www.apache.org/dyn/closer.cgi' 2>/dev/null | " \
+        "grep -o '<strong>[^<]*</strong>' | sed 's/<[^>]*>//g' | head -n 1"
+    log.info("determining apache mirror by running: " + apache_mirror_cmd)
+    apache_mirror_url_front = os.popen(apache_mirror_cmd).read().rstrip() # note: includes trailing slash (/)
+    log.info("chosen apache mirror is " + apache_mirror_url_front)
+
     # set versions for cloning the repo
     apache_maven = 'apache-maven-{maven_version}-bin.tar.gz'.format(
         maven_version=maven_version)
-    maven_link = 'http://archive.apache.org/dist/maven/' + \
-        '{maven_major}/{maven_version}/binaries/'.format(maven_major=maven_major, maven_version=maven_version) + apache_maven
+    maven_link = '{apache_mirror_url_front}/maven/'.format(apache_mirror_url_front=apache_mirror_url_front) + \
+        '{maven_major}/{maven_version}/binaries/'.format(maven_major=maven_major, maven_version=maven_version) + \
+        apache_maven
     hadoop_git = 'https://github.com/apache/hadoop'
     hadoop_rel = 'hadoop-{ver} rel/release-{ver}'.format(ver=hadoop_ver)
     if hadoop_ver == 'trunk':
@@ -204,6 +213,7 @@ def run_s3atest(client, maven_version, testdir, test_options):
             run.Raw('&&'),
             run.Raw(rm_test),
             run.Raw('&&'),
+            run.Raw('JAVA_HOME=$(alternatives --list | grep jre_1.8.0 | head -n 1 | awk \'{print $3}\')'),
             run.Raw(run_test),
             run.Raw(test_options)
         ]
diff --git a/qa/tasks/s3tests.py b/qa/tasks/s3tests.py
index 6d7b39d5892..85ab97d23cd 100644
--- a/qa/tasks/s3tests.py
+++ b/qa/tasks/s3tests.py
@@ -57,6 +57,17 @@ def download(ctx, config):
                     'git', 'reset', '--hard', sha1,
                     ],
                 )
+        if client_config.get('boto3_extensions'):
+            ctx.cluster.only(client).run(
+                    args=['mkdir',
+                          '-p',
+                          '/home/ubuntu/.aws/models/s3/2006-03-01/']
+                    )
+            (remote,) = ctx.cluster.only(client).remotes.keys()
+            remote_file = '/home/ubuntu/.aws/models/s3/2006-03-01/service-2.sdk-extras.json'
+            local_file = '{qadir}/../examples/rgw/boto3/service-2.sdk-extras.json'.format(qadir=ctx.config.get('suite_path'))
+            remote.put_file(local_file, remote_file)
+
     try:
         yield
     finally:
@@ -70,6 +81,17 @@ def download(ctx, config):
                     '{tdir}/s3-tests-{client}'.format(tdir=testdir, client=client),
                     ],
                 )
+            if client_config.get('boto3_extensions'):
+                ctx.cluster.only(client).run(
+                        args=[
+                            'rm', '-rf', '/home/ubuntu/.aws/models/s3/2006-03-01/service-2.sdk-extras.json',
+                            ],
+                        )
+                ctx.cluster.only(client).run(
+                        args=[
+                            'cd', '/home/ubuntu/', run.Raw('&&'), 'rmdir', '-p', '.aws/models/s3/2006-03-01/',
+                            ],
+                        )
 
 
 def _config_user(s3tests_conf, section, user, email):
@@ -444,8 +466,10 @@ def run_tests(ctx, config):
             attrs += ['not fails_with_subdomain']
         if not client_config.get('with-sse-s3'):
             attrs += ['not sse_s3']
-       
+
         attrs += client_config.get('extra_attrs', [])
+        if 'bucket_logging' not in attrs:
+            attrs += ['not bucket_logging']
         if 'unit_test_scan' in client_config and client_config['unit_test_scan']:
             xmlfile_id = datetime.datetime.now().strftime("%Y-%m-%d-%H:%M:%S--") + str(uuid.uuid4())
             xmlpath= f'{testdir}/archive/s3test-{xmlfile_id}.xml'
diff --git a/qa/tasks/s3tests_java.py b/qa/tasks/s3tests_java.py
index 3e20e10d06c..a58aa6cf0b4 100644
--- a/qa/tasks/s3tests_java.py
+++ b/qa/tasks/s3tests_java.py
@@ -284,6 +284,7 @@ class S3tests_java(Task):
             args = ['cd',
                     '{tdir}/s3-tests-java'.format(tdir=testdir),
                     run.Raw('&&'),
+                    run.Raw('JAVA_HOME=$(alternatives --list | grep jre_1.8.0 | head -n 1 | awk \'{print $3}\')'),
                     '/opt/gradle/gradle/bin/gradle', 'clean', 'test',
                     '--rerun-tasks', '--no-build-cache',
                     ]
diff --git a/qa/tasks/stretch_mode_disable_enable.py b/qa/tasks/stretch_mode_disable_enable.py
new file mode 100644
index 00000000000..a84a85bb307
--- /dev/null
+++ b/qa/tasks/stretch_mode_disable_enable.py
@@ -0,0 +1,547 @@
+import logging
+from tasks.mgr.mgr_test_case import MgrTestCase
+
+log = logging.getLogger(__name__)
+
+class TestStretchMode(MgrTestCase):
+    """
+    Test the stretch mode feature of Ceph
+    """
+    POOL = 'stretch_pool'
+    CLUSTER = "ceph"
+    WRITE_PERIOD = 10
+    RECOVERY_PERIOD = WRITE_PERIOD * 6
+    SUCCESS_HOLD_TIME = 7
+    STRETCH_CRUSH_RULE = 'stretch_rule'
+    STRETCH_CRUSH_RULE_ID = None
+    STRETCH_BUCKET_TYPE = 'datacenter'
+    TIEBREAKER_MON_NAME = 'e'
+    DEFAULT_POOL_TYPE = 'replicated'
+    DEFAULT_POOL_CRUSH_RULE = 'replicated_rule'
+    DEFAULT_POOL_SIZE = 3
+    DEFAULT_POOL_MIN_SIZE = 2
+    DEFAULT_POOL_CRUSH_RULE_ID = None
+    # This dictionary maps the datacenter to the osd ids and hosts
+    DC_OSDS = {
+        'dc1': {
+            "host01": [0, 1],
+            "host02": [2, 3],
+        },
+        'dc2': {
+            "host03": [4, 5],
+            "host04": [6, 7],
+        },
+    }
+    DC_MONS = {
+        'dc1': {
+            "host01": ['a'],
+            "host02": ['b'],
+        },
+        'dc2': {
+            "host03": ['c'],
+            "host04": ['d'],
+        },
+        'dc3': {
+            "host05": ['e'],
+        }
+    }
+    def _osd_count(self):
+        """
+        Get the number of OSDs in the cluster.
+        """
+        osd_map = self.mgr_cluster.mon_manager.get_osd_dump_json()
+        return len(osd_map['osds'])
+
+    def setUp(self):
+        """
+        Setup the cluster and
+        ensure we have a clean condition before the test.
+        """
+        # Ensure we have at least 6 OSDs
+        super(TestStretchMode, self).setUp()
+        self.DEFAULT_POOL_CRUSH_RULE_ID = self.mgr_cluster.mon_manager.get_crush_rule_id(self.DEFAULT_POOL_CRUSH_RULE)
+        self.STRETCH_CRUSH_RULE_ID = self.mgr_cluster.mon_manager.get_crush_rule_id(self.STRETCH_CRUSH_RULE)
+        if self._osd_count() < 4:
+            self.skipTest("Not enough OSDS!")
+
+        # Remove any filesystems so that we can remove their pools
+        if self.mds_cluster:
+            self.mds_cluster.mds_stop()
+            self.mds_cluster.mds_fail()
+            self.mds_cluster.delete_all_filesystems()
+
+        # Remove all other pools
+        for pool in self.mgr_cluster.mon_manager.get_osd_dump_json()['pools']:
+            try:
+                self.mgr_cluster.mon_manager.remove_pool(pool['pool_name'])
+            except:
+                self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                    'osd', 'pool', 'delete',
+                    pool['pool_name'],
+                    pool['pool_name'],
+                    '--yes-i-really-really-mean-it')
+
+    def _setup_pool(
+            self,
+            pool_name=POOL,
+            pg_num=16,
+            pool_type=DEFAULT_POOL_TYPE,
+            crush_rule=DEFAULT_POOL_CRUSH_RULE,
+            size=None,
+            min_size=None
+        ):
+        """
+        Create a pool, set its size and pool if specified.
+        """
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'osd', 'pool', 'create', pool_name, str(pg_num), pool_type, crush_rule)
+
+        if size is not None:
+            self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                'osd', 'pool', 'set', pool_name, 'size', str(size))
+
+        if min_size is not None:
+            self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                'osd', 'pool', 'set', pool_name, 'min_size', str(min_size))
+
+    def _write_some_data(self, t):
+        """
+        Write some data to the pool to simulate a workload.
+        """
+        args = [
+            "rados", "-p", self.POOL, "bench", str(t), "write", "-t", "16"]
+        self.mgr_cluster.admin_remote.run(args=args, wait=True)
+
+    def _get_all_mons_from_all_dc(self):
+        """
+        Get all mons from all datacenters.
+        """
+        return [mon for dc in self.DC_MONS.values() for mons in dc.values() for mon in mons]
+
+    def _bring_back_mon(self, mon):
+        """
+        Bring back the mon.
+        """
+        try:
+            self.ctx.daemons.get_daemon('mon', mon, self.CLUSTER).restart()
+        except Exception:
+            log.error("Failed to bring back mon.{}".format(str(mon)))
+            pass
+
+    def _get_host(self, osd):
+        """
+        Get the host of the osd.
+        """
+        for dc, nodes in self.DC_OSDS.items():
+            for node, osds in nodes.items():
+                if osd in osds:
+                    return node
+        return None
+
+    def _move_osd_back_to_host(self, osd):
+        """
+        Move the osd back to the host.
+        """
+        host = self._get_host(osd)
+        assert host is not None, "The host of osd {} is not found.".format(osd)
+        log.debug("Moving osd.%d back to %s", osd, host)
+        self.mgr_cluster.mon_manager.raw_cluster_cmd(
+            'osd', 'crush', 'move', 'osd.{}'.format(str(osd)),
+            'host={}'.format(host)
+        )
+
+    def tearDown(self):
+        """
+        Clean up the cluster after the test.
+        """
+        # Remove the pool
+        if self.POOL in self.mgr_cluster.mon_manager.pools:
+            self.mgr_cluster.mon_manager.remove_pool(self.POOL)
+
+        osd_map = self.mgr_cluster.mon_manager.get_osd_dump_json()
+        for osd in osd_map['osds']:
+            # mark all the osds in
+            if osd['weight'] == 0.0:
+                self.mgr_cluster.mon_manager.raw_cluster_cmd(
+                    'osd', 'in', str(osd['osd']))
+            # Bring back all the osds and move it back to the host.
+            if osd['up'] == 0:
+                self.mgr_cluster.mon_manager.revive_osd(osd['osd'])
+                self._move_osd_back_to_host(osd['osd'])
+        
+        # Bring back all the mons
+        mons = self._get_all_mons_from_all_dc()
+        for mon in mons:
+            self._bring_back_mon(mon)
+        super(TestStretchMode, self).tearDown()
+
+    def _kill_osd(self, osd):
+        """
+        Kill the osd.
+        """
+        try:
+            self.ctx.daemons.get_daemon('osd', osd, self.CLUSTER).stop()
+        except Exception:
+            log.error("Failed to stop osd.{}".format(str(osd)))
+            pass
+
+    def _get_osds_data(self, want_osds):
+        """
+        Get the osd data
+        """
+        all_osds_data = \
+            self.mgr_cluster.mon_manager.get_osd_dump_json()['osds']
+        return [
+            osd_data for osd_data in all_osds_data
+            if int(osd_data['osd']) in want_osds
+        ]
+
+    def _get_osds_by_dc(self, dc):
+        """
+        Get osds by datacenter.
+        """
+        ret = []
+        for host, osds in self.DC_OSDS[dc].items():
+            ret.extend(osds)
+        return ret
+
+    def _fail_over_all_osds_in_dc(self, dc):
+        """
+        Fail over all osds in specified <datacenter>
+        """
+        if not isinstance(dc, str):
+            raise ValueError("dc must be a string")
+        if dc not in self.DC_OSDS:
+            raise ValueError(
+                "dc must be one of the following: %s" % self.DC_OSDS.keys()
+                )
+        log.debug("Failing over all osds in %s", dc)
+        osds = self._get_osds_by_dc(dc)
+        # fail over all the OSDs in the DC
+        log.debug("OSDs to failed over: %s", osds)
+        for osd_id in osds:
+            self._kill_osd(osd_id)
+        # wait until all the osds are down
+        self.wait_until_true(
+            lambda: all([int(osd['up']) == 0
+                        for osd in self._get_osds_data(osds)]),
+            timeout=self.RECOVERY_PERIOD
+        )
+
+    def _check_mons_out_of_quorum(self, want_mons):
+        """
+        Check if the mons are not in quorum.
+        """
+        quorum_names = self.mgr_cluster.mon_manager.get_mon_quorum_names()
+        return all([mon not in quorum_names for mon in want_mons])
+
+    def _kill_mon(self, mon):
+        """
+        Kill the mon.
+        """
+        try:
+            self.ctx.daemons.get_daemon('mon', mon, self.CLUSTER).stop()
+        except Exception:
+            log.error("Failed to stop mon.{}".format(str(mon)))
+            pass
+
+    def _get_mons_by_dc(self, dc):
+        """
+        Get mons by datacenter.
+        """
+        ret = []
+        for host, mons in self.DC_MONS[dc].items():
+            ret.extend(mons)
+        return ret
+
+    def _fail_over_all_mons_in_dc(self, dc):
+        """
+        Fail over all mons in the specified <datacenter>
+        """
+        if not isinstance(dc, str):
+            raise ValueError("dc must be a string")
+        if dc not in self.DC_MONS:
+            raise ValueError("dc must be one of the following: %s" %
+                             ", ".join(self.DC_MONS.keys()))
+        log.debug("Failing over all mons %s", dc)
+        mons = self._get_mons_by_dc(dc)
+        log.debug("Mons to be failed over: %s", mons)
+        for mon in mons:
+            self._kill_mon(mon)
+        # wait until all the mons are out of quorum
+        self.wait_until_true(
+            lambda: self._check_mons_out_of_quorum(mons),
+            timeout=self.RECOVERY_PERIOD
+        )
+
+    def _stretch_mode_enabled_correctly(self):
+        """
+        Evaluate whether the stretch mode is enabled correctly.
+        by checking the OSDMap and MonMap.
+        """
+        # Checking the OSDMap
+        osdmap = self.mgr_cluster.mon_manager.get_osd_dump_json()
+        for pool in osdmap['pools']:
+            # expects crush_rule to be stretch_rule
+            self.assertEqual(
+                self.STRETCH_CRUSH_RULE_ID,
+                pool['crush_rule']
+            )
+            # expects pool size to be 4
+            self.assertEqual(
+                4,
+                pool['size']
+            )
+            # expects pool min_size to be 2
+            self.assertEqual(
+                2,
+                pool['min_size']
+            )
+            # expects pool is_stretch_pool flag to be true
+            self.assertEqual(
+                True,
+                pool['is_stretch_pool']
+            )
+            # expects peering_crush_bucket_count = 2 (always this value for stretch mode)
+            self.assertEqual(
+                2,
+                pool['peering_crush_bucket_count']
+            )
+            # expects peering_crush_bucket_target = 2 (always this value for stretch mode)
+            self.assertEqual(
+                2,
+                pool['peering_crush_bucket_target']
+            )
+            # expects peering_crush_bucket_barrier = 8 (crush type of datacenter is 8)
+            self.assertEqual(
+                8,
+                pool['peering_crush_bucket_barrier']
+            )
+        # expects stretch_mode_enabled to be True
+        self.assertEqual(
+            True,
+            osdmap['stretch_mode']['stretch_mode_enabled']
+        )
+        # expects stretch_mode_bucket_count to be 2
+        self.assertEqual(
+            2,
+            osdmap['stretch_mode']['stretch_bucket_count']
+        )
+        # expects degraded_stretch_mode to be 0
+        self.assertEqual(
+            0,
+            osdmap['stretch_mode']['degraded_stretch_mode']
+        )
+        # expects recovering_stretch_mode to be 0
+        self.assertEqual(
+            0,
+            osdmap['stretch_mode']['recovering_stretch_mode']
+        )
+        # expects stretch_mode_bucket to be 8 (datacenter crush type = 8)
+        self.assertEqual(
+            8,
+            osdmap['stretch_mode']['stretch_mode_bucket']
+        )
+        # Checking the MonMap
+        monmap = self.mgr_cluster.mon_manager.get_mon_dump_json()
+        # expects stretch_mode to be True
+        self.assertEqual(
+            True,
+            monmap['stretch_mode']
+        )
+        # expects disallowed_leaders to be tiebreaker_mon
+        self.assertEqual(
+            self.TIEBREAKER_MON_NAME,
+            monmap['disallowed_leaders']
+        )
+        # expects tiebreaker_mon to be tiebreaker_mon
+        self.assertEqual(
+            self.TIEBREAKER_MON_NAME,
+            monmap['tiebreaker_mon']
+        )
+
+    def _stretch_mode_disabled_correctly(self):
+        """
+        Evaluate whether the stretch mode is disabled correctly.
+        by checking the OSDMap and MonMap.
+        """
+        # Checking the OSDMap
+        osdmap = self.mgr_cluster.mon_manager.get_osd_dump_json()
+        for pool in osdmap['pools']:
+            # expects crush_rule to be default
+            self.assertEqual(
+                self.DEFAULT_POOL_CRUSH_RULE_ID,
+                pool['crush_rule']
+            )
+            # expects pool size to be default
+            self.assertEqual(
+                self.DEFAULT_POOL_SIZE,
+                pool['size']
+            )
+            # expects pool min_size to be default
+            self.assertEqual(
+                self.DEFAULT_POOL_MIN_SIZE,
+                pool['min_size']
+            )
+            # expects pool is_stretch_pool flag to be false
+            self.assertEqual(
+                False,
+                pool['is_stretch_pool']
+            )
+            # expects peering_crush_bucket_count = 0
+            self.assertEqual(
+                0,
+                pool['peering_crush_bucket_count']
+            )
+            # expects peering_crush_bucket_target = 0
+            self.assertEqual(
+                0,
+                pool['peering_crush_bucket_target']
+            )
+            # expects peering_crush_bucket_barrier = 0
+            self.assertEqual(
+                0,
+                pool['peering_crush_bucket_barrier']
+            )
+        # expects stretch_mode_enabled to be False
+        self.assertEqual(
+            False,
+            osdmap['stretch_mode']['stretch_mode_enabled']
+        )
+        # expects stretch_mode_bucket to be 0
+        self.assertEqual(
+            0,
+            osdmap['stretch_mode']['stretch_bucket_count']
+        )
+        # expects degraded_stretch_mode to be 0
+        self.assertEqual(
+            0,
+            osdmap['stretch_mode']['degraded_stretch_mode']
+        )
+        # expects recovering_stretch_mode to be 0
+        self.assertEqual(
+            0,
+            osdmap['stretch_mode']['recovering_stretch_mode']
+        )
+        # expects stretch_mode_bucket to be 0
+        self.assertEqual(
+            0,
+            osdmap['stretch_mode']['stretch_mode_bucket']
+        )
+        # Checking the MonMap
+        monmap = self.mgr_cluster.mon_manager.get_mon_dump_json()
+        # expects stretch_mode to be False
+        self.assertEqual(
+            False,
+            monmap['stretch_mode']
+        )
+        # expects disallowed_leaders to be empty
+        self.assertEqual(
+            "",
+            monmap['disallowed_leaders']
+        )
+        # expects tiebreaker_mon to be empty
+        self.assertEqual(
+            "",
+            monmap['tiebreaker_mon']
+        )
+
+    def test_disable_stretch_mode(self):
+        """
+        Test disabling stretch mode with the following scenario:
+        1. Healthy Stretch Mode
+        2. Degraded Stretch Mode
+        """
+        # Create a pool
+        self._setup_pool(self.POOL, 16, 'replicated', self.STRETCH_CRUSH_RULE, 4, 2)
+        # Write some data to the pool
+        self._write_some_data(self.WRITE_PERIOD)
+        # disable stretch mode without --yes-i-really-mean-it (expects -EPERM 1)
+        self.assertEqual(
+            1,
+            self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+                'mon',
+                'disable_stretch_mode'
+            ))
+        # Disable stretch mode with non-existent crush rule (expects -EINVAL 22)
+        self.assertEqual(
+            22,
+            self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+                'mon',
+                'disable_stretch_mode',
+                'non_existent_rule',
+                '--yes-i-really-mean-it'
+            ))
+        # Disable stretch mode with the current stretch rule (expect -EINVAL 22)
+        self.assertEqual(
+            22,
+            self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+                'mon',
+                'disable_stretch_mode',
+                self.STRETCH_CRUSH_RULE,
+                '--yes-i-really-mean-it',
+
+            ))
+        # Disable stretch mode without crush rule (expect success 0)
+        self.assertEqual(
+            0,
+            self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+                'mon',
+                'disable_stretch_mode',
+                '--yes-i-really-mean-it'
+            ))
+        # Check if stretch mode is disabled correctly
+        self._stretch_mode_disabled_correctly()
+        # all PGs are active + clean
+        self.wait_until_true_and_hold(
+            lambda: self.mgr_cluster.mon_manager.pg_all_active_clean(),
+            timeout=self.RECOVERY_PERIOD,
+            success_hold_time=self.SUCCESS_HOLD_TIME
+        )
+        # write some data to the pool
+        self._write_some_data(self.WRITE_PERIOD)
+        # Enable stretch mode
+        self.assertEqual(
+            0,
+            self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+                'mon',
+                'enable_stretch_mode',
+                self.TIEBREAKER_MON_NAME,
+                self.STRETCH_CRUSH_RULE,
+                self.STRETCH_BUCKET_TYPE
+            ))
+        self._stretch_mode_enabled_correctly()
+        # all PGs are active + clean
+        self.wait_until_true_and_hold(
+            lambda: self.mgr_cluster.mon_manager.pg_all_active_clean(),
+            timeout=self.RECOVERY_PERIOD,
+            success_hold_time=self.SUCCESS_HOLD_TIME
+        )
+        # write some data to the pool
+        # self._write_some_data(self.WRITE_PERIOD)
+        # Bring down dc1
+        self._fail_over_all_osds_in_dc('dc1')
+        self._fail_over_all_mons_in_dc('dc1')
+        # should be in degraded stretch mode
+        self.wait_until_true_and_hold(
+            lambda: self.mgr_cluster.mon_manager.is_degraded_stretch_mode(),
+            timeout=self.RECOVERY_PERIOD,
+            success_hold_time=self.SUCCESS_HOLD_TIME
+        )
+        # Disable stretch mode with valid crush rule (expect success 0)
+        self.assertEqual(
+            0,
+            self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+                'mon',
+                'disable_stretch_mode',
+                self.DEFAULT_POOL_CRUSH_RULE,
+                '--yes-i-really-mean-it'
+            ))
+        # Check if stretch mode is disabled correctly
+        self._stretch_mode_disabled_correctly()
+        # all PGs are active
+        self.wait_until_true_and_hold(
+            lambda: self.mgr_cluster.mon_manager.pg_all_active(),
+            timeout=self.RECOVERY_PERIOD,
+            success_hold_time=self.SUCCESS_HOLD_TIME
+        )
diff --git a/qa/tasks/thrashosds-health.yaml b/qa/tasks/thrashosds-health.yaml
index b70583a75e1..dbde1ced0db 100644
--- a/qa/tasks/thrashosds-health.yaml
+++ b/qa/tasks/thrashosds-health.yaml
@@ -30,3 +30,4 @@ overrides:
       - out of quorum
       - noscrub
       - nodeep-scrub
+      - is down
diff --git a/qa/tasks/vstart_runner.py b/qa/tasks/vstart_runner.py
index ca929ba05b4..2ed21431330 100644
--- a/qa/tasks/vstart_runner.py
+++ b/qa/tasks/vstart_runner.py
@@ -233,6 +233,11 @@ class LocalRemoteProcess(object):
         else:
             self.stderr.write(err)
 
+    def _handle_subprocess_output(self, output, stream):
+        if isinstance(stream, StringIO):
+            return rm_nonascii_chars(output)
+        return output
+
     def wait(self, timeout=None):
         # Null subproc.stdin so communicate() does not try flushing/closing it
         # again.
@@ -250,7 +255,8 @@ class LocalRemoteProcess(object):
                 return
 
         out, err = self.subproc.communicate(timeout=timeout)
-        out, err = rm_nonascii_chars(out), rm_nonascii_chars(err)
+        out = self._handle_subprocess_output(out, self.stdout)
+        err = self._handle_subprocess_output(err, self.stderr)
         self._write_stdout(out)
         self._write_stderr(err)
 
diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh
index cdfff17d837..ad5950367e9 100755
--- a/qa/workunits/cephtool/test.sh
+++ b/qa/workunits/cephtool/test.sh
@@ -63,7 +63,7 @@ function retry_eagain()
     for count in $(seq 1 $max) ; do
         status=0
         "$@" > $tmpfile 2>&1 || status=$?
-        if test $status = 0 || 
+        if test $status = 0 ||
             ! grep --quiet EAGAIN $tmpfile ; then
             break
         fi
@@ -108,7 +108,7 @@ function check_response()
 		exit 1
 	fi
 
-	if ! grep --quiet -- "$expected_string" $TMPFILE ; then 
+	if ! grep --quiet -- "$expected_string" $TMPFILE ; then
 		echo "Didn't find $expected_string in output" >&2
 		cat $TMPFILE >&2
 		exit 1
@@ -696,7 +696,7 @@ function test_auth_profiles()
 
   ceph -n client.xx-profile-rd -k client.xx.keyring auth del client.xx-profile-ro
   ceph -n client.xx-profile-rd -k client.xx.keyring auth del client.xx-profile-rw
-  
+
   # add a new role-definer with the existing role-definer
   ceph -n client.xx-profile-rd -k client.xx.keyring \
     auth add client.xx-profile-rd2 mon 'allow profile role-definer'
@@ -730,7 +730,7 @@ function test_mon_caps()
   ceph-authtool -n client.bug --cap mon '' $TEMP_DIR/ceph.client.bug.keyring
   ceph auth add client.bug -i  $TEMP_DIR/ceph.client.bug.keyring
   rados lspools --no-mon-config --keyring $TEMP_DIR/ceph.client.bug.keyring -n client.bug >& $TMPFILE || true
-  check_response "Permission denied"  
+  check_response "Permission denied"
 }
 
 function test_mon_misc()
@@ -780,7 +780,6 @@ function test_mon_misc()
   ceph mgr dump
   ceph mgr dump | jq -e '.active_clients[0].name'
   ceph mgr module ls
-  ceph mgr module enable restful
   expect_false ceph mgr module enable foodne
   ceph mgr module enable foodne --force
   ceph mgr module disable foodne
@@ -1650,7 +1649,7 @@ function test_mon_osd()
   dump_json=$(ceph osd dump --format=json | \
 	  jq -cM '.osds[] | select(.osd == 0)')
   [[ "${info_json}" == "${dump_json}" ]]
-  
+
   info_plain="$(ceph osd info)"
   dump_plain="$(ceph osd dump | grep '^osd')"
   [[ "${info_plain}" == "${dump_plain}" ]]
@@ -2244,7 +2243,7 @@ function test_mon_pg()
   # tell osd version
   #
   ceph tell osd.0 version
-  expect_false ceph tell osd.9999 version 
+  expect_false ceph tell osd.9999 version
   expect_false ceph tell osd.foo version
 
   # back to pg stuff
@@ -2336,7 +2335,7 @@ function test_mon_osd_pool_set()
   ceph osd pool get $TEST_POOL_GETSET deep_scrub_interval | expect_false grep '.'
 
   ceph osd pool get $TEST_POOL_GETSET recovery_priority | expect_false grep '.'
-  ceph osd pool set $TEST_POOL_GETSET recovery_priority 5 
+  ceph osd pool set $TEST_POOL_GETSET recovery_priority 5
   ceph osd pool get $TEST_POOL_GETSET recovery_priority | grep 'recovery_priority: 5'
   ceph osd pool set $TEST_POOL_GETSET recovery_priority -5
   ceph osd pool get $TEST_POOL_GETSET recovery_priority | grep 'recovery_priority: -5'
@@ -2346,13 +2345,13 @@ function test_mon_osd_pool_set()
   expect_false ceph osd pool set $TEST_POOL_GETSET recovery_priority 11
 
   ceph osd pool get $TEST_POOL_GETSET recovery_op_priority | expect_false grep '.'
-  ceph osd pool set $TEST_POOL_GETSET recovery_op_priority 5 
+  ceph osd pool set $TEST_POOL_GETSET recovery_op_priority 5
   ceph osd pool get $TEST_POOL_GETSET recovery_op_priority | grep 'recovery_op_priority: 5'
   ceph osd pool set $TEST_POOL_GETSET recovery_op_priority 0
   ceph osd pool get $TEST_POOL_GETSET recovery_op_priority | expect_false grep '.'
 
   ceph osd pool get $TEST_POOL_GETSET scrub_priority | expect_false grep '.'
-  ceph osd pool set $TEST_POOL_GETSET scrub_priority 5 
+  ceph osd pool set $TEST_POOL_GETSET scrub_priority 5
   ceph osd pool get $TEST_POOL_GETSET scrub_priority | grep 'scrub_priority: 5'
   ceph osd pool set $TEST_POOL_GETSET scrub_priority 0
   ceph osd pool get $TEST_POOL_GETSET scrub_priority | expect_false grep '.'
@@ -2386,10 +2385,10 @@ function test_mon_osd_pool_set()
   ceph osd pool set $TEST_POOL_GETSET size 2
   wait_for_clean
   ceph osd pool set $TEST_POOL_GETSET min_size 2
-  
+
   expect_false ceph osd pool set $TEST_POOL_GETSET hashpspool 0
   ceph osd pool set $TEST_POOL_GETSET hashpspool 0 --yes-i-really-mean-it
-  
+
   expect_false ceph osd pool set $TEST_POOL_GETSET hashpspool 1
   ceph osd pool set $TEST_POOL_GETSET hashpspool 1 --yes-i-really-mean-it
 
@@ -2587,7 +2586,7 @@ function test_mon_osd_misc()
   ceph osd map 2>$TMPFILE; check_response 'pool' $? 22
 
   # expect error about unused argument foo
-  ceph osd ls foo 2>$TMPFILE; check_response 'unused' $? 22 
+  ceph osd ls foo 2>$TMPFILE; check_response 'unused' $? 22
 
   # expect "not in range" for invalid overload percentage
   ceph osd reweight-by-utilization 80 2>$TMPFILE; check_response 'higher than 100' $? 22
diff --git a/qa/workunits/erasure-code/bench.sh b/qa/workunits/erasure-code/bench.sh
index fc75830dfd0..87e997c3500 100755
--- a/qa/workunits/erasure-code/bench.sh
+++ b/qa/workunits/erasure-code/bench.sh
@@ -17,7 +17,8 @@
 #
 # Test that it works from sources with:
 #
-#  CEPH_ERASURE_CODE_BENCHMARK=src/ceph_erasure_code_benchmark  \
+#  TOTAL_SIZE=$((4 * 1024 * 1024)) SIZE=4096 \
+#  CEPH_ERASURE_CODE_BENCHMARK=build/bin/ceph_erasure_code_benchmark  \
 #  PLUGIN_DIRECTORY=build/lib \
 #      qa/workunits/erasure-code/bench.sh fplot jerasure |
 #      tee qa/workunits/erasure-code/bench.js
@@ -34,10 +35,14 @@
 #  firefox qa/workunits/erasure-code/bench.html
 #
 # Once it is confirmed to work, it can be run with a more significant
-# volume of data so that the measures are more reliable:
+# volume of data so that the measures are more reliable. Ideally the size
+# of the buffers (SIZE) should be larger than the L3 cache to avoid cache hits.
+# The following example uses an 80MB (80 * 1024 * 1024) buffer.
+# A larger buffer with fewer iterations (iterations = TOTAL SIZE / SIZE) should result in
+# more time spent encoding/decoding and less time allocating/aligning buffers:
 #
-#  TOTAL_SIZE=$((4 * 1024 * 1024 * 1024)) \
-#  CEPH_ERASURE_CODE_BENCHMARK=src/ceph_erasure_code_benchmark  \
+#  TOTAL_SIZE=$((100 * 80 * 1024 * 1024)) SIZE=$((80 * 1024 * 1024)) \
+#  CEPH_ERASURE_CODE_BENCHMARK=build/bin/ceph_erasure_code_benchmark  \
 #  PLUGIN_DIRECTORY=build/lib \
 #      qa/workunits/erasure-code/bench.sh fplot jerasure |
 #      tee qa/workunits/erasure-code/bench.js
@@ -51,8 +56,8 @@ export PATH=/sbin:$PATH
 : ${PLUGIN_DIRECTORY:=/usr/lib/ceph/erasure-code}
 : ${PLUGINS:=isa jerasure}
 : ${TECHNIQUES:=vandermonde cauchy liberation reed_sol_r6_op blaum_roth liber8tion}
-: ${TOTAL_SIZE:=$((1024 * 1024))}
-: ${SIZE:=4096}
+: ${TOTAL_SIZE:=$((100 * 80 * 1024 * 1024))} #TOTAL_SIZE / SIZE = number of encode or decode iterations to run
+: ${SIZE:=$((80 * 1024 * 1024))} #size of buffer to encode/decode
 : ${PARAMETERS:=--parameter jerasure-per-chunk-alignment=true}
 
 declare -rA isa_techniques=(
diff --git a/qa/workunits/fs/misc/fallocate.sh b/qa/workunits/fs/misc/fallocate.sh
new file mode 100755
index 00000000000..253e6cb7a37
--- /dev/null
+++ b/qa/workunits/fs/misc/fallocate.sh
@@ -0,0 +1,17 @@
+#!/bin/sh -x
+
+# fallocate with mode 0 should fail with EOPNOTSUPP
+set -e
+mkdir -p testdir
+cd testdir
+
+expect_failure() {
+	if "$@"; then return 1; else return 0; fi
+}
+
+expect_failure fallocate -l 1M preallocated.txt
+rm -f preallocated.txt
+
+cd ..
+rmdir testdir
+echo OK
diff --git a/qa/workunits/fs/snaps/snaptest-git-ceph.sh b/qa/workunits/fs/snaps/snaptest-git-ceph.sh
index 2b38720c9a5..6079ba8945b 100755
--- a/qa/workunits/fs/snaps/snaptest-git-ceph.sh
+++ b/qa/workunits/fs/snaps/snaptest-git-ceph.sh
@@ -4,7 +4,14 @@ set -e
 
 # increase the cache size
 sudo git config --global http.sslVerify false
-sudo git config --global http.postBuffer 1048576000
+sudo git config --global http.postBuffer 1024MB # default is 1MB
+sudo git config --global http.maxRequestBuffer 100M # default is 10MB
+sudo git config --global core.compression 0
+
+# enable the debug logs for git clone
+export GIT_TRACE_PACKET=1
+export GIT_TRACE=1
+export GIT_CURL_VERBOSE=1
 
 # try it again if the clone is slow and the second time
 retried=false
@@ -19,6 +26,11 @@ timeout 1800 git clone https://git.ceph.com/ceph.git
 trap - EXIT
 cd ceph
 
+# disable the debug logs for git clone
+export GIT_TRACE_PACKET=0
+export GIT_TRACE=0
+export GIT_CURL_VERBOSE=0
+
 versions=`seq 1 90`
 
 for v in $versions
diff --git a/qa/workunits/mon/mon-stretch-mode-5-mons-8-osds.sh b/qa/workunits/mon/mon-stretch-mode-5-mons-8-osds.sh
new file mode 100755
index 00000000000..827fb0a0b13
--- /dev/null
+++ b/qa/workunits/mon/mon-stretch-mode-5-mons-8-osds.sh
@@ -0,0 +1,72 @@
+#!/bin/bash -ex
+
+# A bash script for setting up stretch mode with 5 monitors and 8 OSDs.
+
+NUM_OSDS_UP=$(ceph osd df | grep "up" | wc -l)
+
+if [ $NUM_OSDS_UP -lt 8 ]; then
+    echo "test requires at least 8 OSDs up and running"
+    exit 1
+fi
+
+# ensure election strategy is set to "connectivity"
+# See https://tracker.ceph.com/issues/69107
+ceph mon set election_strategy connectivity
+
+for dc in dc1 dc2
+    do
+      ceph osd crush add-bucket $dc datacenter
+      ceph osd crush move $dc root=default
+    done
+
+ceph osd crush add-bucket host01 host
+ceph osd crush add-bucket host02 host
+ceph osd crush add-bucket host03 host
+ceph osd crush add-bucket host04 host
+
+ceph osd crush move host01 datacenter=dc1
+ceph osd crush move host02 datacenter=dc1
+ceph osd crush move host03 datacenter=dc2
+ceph osd crush move host04 datacenter=dc2
+
+ceph osd crush move osd.0 host=host01
+ceph osd crush move osd.1 host=host01
+ceph osd crush move osd.2 host=host02
+ceph osd crush move osd.3 host=host02
+ceph osd crush move osd.4 host=host03
+ceph osd crush move osd.5 host=host03
+ceph osd crush move osd.6 host=host04
+ceph osd crush move osd.7 host=host04
+
+# set location for monitors
+ceph mon set_location a datacenter=dc1 host=host01
+ceph mon set_location b datacenter=dc1 host=host02
+ceph mon set_location c datacenter=dc2 host=host03
+ceph mon set_location d datacenter=dc2 host=host04
+
+# set location for tiebreaker monitor
+ceph mon set_location e datacenter=dc3 host=host05
+
+# remove the current host from crush map
+hostname=$(hostname -s)
+ceph osd crush remove $hostname
+# create a new crush rule with stretch rule
+ceph osd getcrushmap > crushmap
+crushtool --decompile crushmap > crushmap.txt
+sed 's/^# end crush map$//' crushmap.txt > crushmap_modified.txt
+cat >> crushmap_modified.txt << EOF
+rule stretch_rule {
+       id 2
+       type replicated
+       step take default
+       step choose firstn 2 type datacenter
+       step chooseleaf firstn 2 type host
+       step emit
+}
+# end crush map
+EOF
+
+crushtool --compile crushmap_modified.txt -o crushmap.bin
+ceph osd setcrushmap -i crushmap.bin
+
+ceph mon enable_stretch_mode e stretch_rule datacenter
diff --git a/qa/workunits/nvmeof/basic_tests.sh b/qa/workunits/nvmeof/basic_tests.sh
index dc6fd1669da..9e7a1f5134e 100755
--- a/qa/workunits/nvmeof/basic_tests.sh
+++ b/qa/workunits/nvmeof/basic_tests.sh
@@ -38,8 +38,10 @@ disconnect_all() {
 connect_all() {
     sudo nvme connect-all --traddr=$NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --transport=tcp -l 3600
     sleep 5
-    output=$(sudo nvme list --output-format=json)
-    if ! echo "$output" | grep -q "$SPDK_CONTROLLER"; then
+    expected_devices_count=$1
+    actual_devices=$(sudo nvme list --output-format=json | jq -r ".Devices[].Subsystems[] | select(.Controllers | all(.ModelNumber == \"$SPDK_CONTROLLER\")) | .Namespaces[].NameSpace" | wc -l)
+    if [ "$actual_devices" -ne "$expected_devices_count" ]; then
+        sudo nvme list --output-format=json
         return 1
     fi
 }
@@ -72,11 +74,13 @@ test_run connect
 test_run list_subsys 1
 test_run disconnect_all
 test_run list_subsys 0
-test_run connect_all
+devices_count=$(( $NVMEOF_NAMESPACES_COUNT * $NVMEOF_SUBSYSTEMS_COUNT )) 
+test_run connect_all $devices_count
 gateways_count=$(( $(echo "$NVMEOF_GATEWAY_IP_ADDRESSES" | tr -cd ',' | wc -c) + 1 ))
 multipath_count=$(( $gateways_count * $NVMEOF_SUBSYSTEMS_COUNT)) 
 test_run list_subsys $multipath_count
 
 
+
 echo "-------------Test Summary-------------"
 echo "[nvmeof] All nvmeof basic tests passed!"
diff --git a/qa/workunits/nvmeof/fio_test.sh b/qa/workunits/nvmeof/fio_test.sh
index 57d355a6318..f7f783afc67 100755
--- a/qa/workunits/nvmeof/fio_test.sh
+++ b/qa/workunits/nvmeof/fio_test.sh
@@ -5,6 +5,7 @@ sudo yum -y install sysstat
 
 namespace_range_start=
 namespace_range_end=
+random_devices_count=
 rbd_iostat=false
 
 while [[ $# -gt 0 ]]; do
@@ -17,6 +18,10 @@ while [[ $# -gt 0 ]]; do
             namespace_range_end=$2
             shift 2
             ;;
+        --random_devices)
+            random_devices_count=$2
+            shift 2
+            ;;
         --rbd_iostat)
             rbd_iostat=true
             shift
@@ -29,7 +34,7 @@ done
 
 fio_file=$(mktemp -t nvmeof-fio-XXXX)
 all_drives_list=$(sudo nvme list --output-format=json | 
-    jq -r '.Devices | sort_by(.NameSpace) | .[] | select(.ModelNumber == "Ceph bdev Controller") | .DevicePath')
+    jq -r '.Devices[].Subsystems[] | select(.Controllers | all(.ModelNumber == "Ceph bdev Controller")) | .Namespaces | sort_by(.NSID) | .[] | .NameSpace')
 
 # When the script is passed --start_ns and --end_ns (example: `nvmeof_fio_test.sh --start_ns 1 --end_ns 3`), 
 # then fio runs on namespaces only in the defined range (which is 1 to 3 here). 
@@ -37,6 +42,8 @@ all_drives_list=$(sudo nvme list --output-format=json |
 # run on first 3 namespaces here.
 if [ "$namespace_range_start" ] || [ "$namespace_range_end" ]; then
     selected_drives=$(echo "${all_drives_list[@]}" | sed -n "${namespace_range_start},${namespace_range_end}p")
+elif [ "$random_devices_count" ]; then 
+    selected_drives=$(echo "${all_drives_list[@]}" | shuf -n $random_devices_count)
 else
     selected_drives="${all_drives_list[@]}"
 fi
diff --git a/qa/workunits/nvmeof/mtls_test.sh b/qa/workunits/nvmeof/mtls_test.sh
new file mode 100755
index 00000000000..e13ca530e8d
--- /dev/null
+++ b/qa/workunits/nvmeof/mtls_test.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+set -ex
+source /etc/ceph/nvmeof.env
+
+# install yq
+wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /tmp/yq && chmod +x /tmp/yq
+
+subjectAltName=$(echo "$NVMEOF_GATEWAY_IP_ADDRESSES" | sed 's/,/,IP:/g')
+
+# create mtls spec files
+ceph orch ls nvmeof --export > /tmp/gw-conf-original.yaml
+sudo /tmp/yq ".spec.enable_auth=true | \
+    .spec.root_ca_cert=\"mountcert\" | \
+    .spec.client_cert = load_str(\"/etc/ceph/client.crt\") | \
+    .spec.client_key = load_str(\"/etc/ceph/client.key\") | \
+    .spec.server_cert = load_str(\"/etc/ceph/server.crt\") | \
+    .spec.server_key = load_str(\"/etc/ceph/server.key\")" /tmp/gw-conf-original.yaml > /tmp/gw-conf-with-mtls.yaml
+cp /tmp/gw-conf-original.yaml /tmp/gw-conf-without-mtls.yaml 
+sudo /tmp/yq '.spec.enable_auth=false' -i /tmp/gw-conf-without-mtls.yaml
+
+wait_for_service() {
+    MAX_RETRIES=30
+    for ((RETRY_COUNT=1; RETRY_COUNT<=MAX_RETRIES; RETRY_COUNT++)); do
+
+        if ceph orch ls --refresh | grep -q "nvmeof"; then
+            echo "Found nvmeof in the output!"
+            break
+        fi
+        if [ $RETRY_COUNT -eq $MAX_RETRIES ]; then
+            echo "Reached maximum retries ($MAX_RETRIES). Exiting."
+            break
+        fi
+        sleep 5
+    done
+    ceph orch ps
+    ceph orch ls --refresh
+}
+
+# deploy mtls
+cat /tmp/gw-conf-with-mtls.yaml 
+ceph orch apply -i /tmp/gw-conf-with-mtls.yaml
+ceph orch redeploy nvmeof.mypool.mygroup0 
+sleep 100
+wait_for_service
+
+
+# test
+IFS=',' read -ra gateway_ips <<< "$NVMEOF_GATEWAY_IP_ADDRESSES"
+for i in "${!gateway_ips[@]}"
+do
+    ip="${gateway_ips[i]}"
+    sudo podman run -v /etc/ceph/server.crt:/server.crt:z -v /etc/ceph/client.crt:/client.crt:z \
+        -v /etc/ceph/client.key:/client.key:z  \
+        -it $NVMEOF_CLI_IMAGE --server-address $ip --server-port $NVMEOF_SRPORT \
+        --client-key /client.key --client-cert /client.crt --server-cert /server.crt --format json subsystem list
+done
+
+
+# remove mtls
+cat /tmp/gw-conf-without-mtls.yaml 
+ceph orch apply -i /tmp/gw-conf-without-mtls.yaml
+ceph orch redeploy nvmeof.mypool.mygroup0 
+sleep 100
+wait_for_service
+
+
+# test
+IFS=',' read -ra gateway_ips <<< "$NVMEOF_GATEWAY_IP_ADDRESSES"
+for i in "${!gateway_ips[@]}"
+do
+    ip="${gateway_ips[i]}"
+    sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $ip --server-port $NVMEOF_SRPORT \
+        --format json subsystem list
+done
+
diff --git a/qa/workunits/nvmeof/scalability_test.sh b/qa/workunits/nvmeof/scalability_test.sh
index 5a26b6284f7..8ede4b7eda2 100755
--- a/qa/workunits/nvmeof/scalability_test.sh
+++ b/qa/workunits/nvmeof/scalability_test.sh
@@ -3,37 +3,64 @@
 
 GATEWAYS=$1 # exmaple "nvmeof.a,nvmeof.b"
 DELAY="${SCALING_DELAYS:-50}"
+POOL="${RBD_POOL:-mypool}"
+GROUP="${NVMEOF_GROUP:-mygroup0}"
+source /etc/ceph/nvmeof.env
 
 if [ -z "$GATEWAYS" ]; then
     echo "At least one gateway needs to be defined for scalability test"
     exit 1
 fi
 
-pip3 install yq
-
 status_checks() {
-    ceph nvme-gw show mypool ''
-    ceph orch ls
-    ceph orch ps 
-    ceph -s
+    expected_count=$1
+
+    output=$(ceph nvme-gw show $POOL $GROUP) 
+    nvme_show=$(echo $output | grep -o '"AVAILABLE"' | wc -l)
+    if [ "$nvme_show" -ne "$expected_count" ]; then
+        return 1
+    fi
+
+    orch_ls=$(ceph orch ls)
+    if ! echo "$orch_ls" | grep -q "$expected_count/$expected_count"; then
+        return 1
+    fi
+
+    output=$(ceph orch ps --service-name nvmeof.$POOL.$GROUP)     
+    orch_ps=$(echo $output | grep -o 'running' | wc -l)
+    if [ "$orch_ps" -ne "$expected_count" ]; then
+        return 1
+    fi
+
+    ceph_status=$(ceph -s)
+    if ! echo "$ceph_status" | grep -q "HEALTH_OK"; then
+        return 1
+    fi
 }
 
+total_gateways_count=$(( $(echo "$NVMEOF_GATEWAY_IP_ADDRESSES" | tr -cd ',' | wc -c) + 1 ))
+scaled_down_gateways_count=$(( total_gateways_count - $(echo "$GATEWAYS" | tr -cd ',' | wc -c) - 1 ))
+
 
 echo "[nvmeof.scale] Setting up config to remove gateways ${GATEWAYS}"
+ceph orch ls --service-name nvmeof.$POOL.$GROUP --export > /tmp/nvmeof-gw.yaml
 ceph orch ls nvmeof --export > /tmp/nvmeof-gw.yaml
 cat /tmp/nvmeof-gw.yaml
-yq "del(.placement.hosts[] | select(. | test(\".*($(echo $GATEWAYS | sed 's/,/|/g'))\")))" /tmp/nvmeof-gw.yaml > /tmp/nvmeof-gw-new.yaml
+
+pattern=$(echo $GATEWAYS | sed 's/,/\\|/g')
+sed "/$pattern/d" /tmp/nvmeof-gw.yaml > /tmp/nvmeof-gw-new.yaml  
 cat /tmp/nvmeof-gw-new.yaml
 
 echo "[nvmeof.scale] Starting scale testing by removing ${GATEWAYS}"
-status_checks
-ceph orch rm nvmeof.mypool && sleep 20 # temp workaround
+status_checks $total_gateways_count 
 ceph orch apply -i /tmp/nvmeof-gw-new.yaml # downscale
+ceph orch redeploy nvmeof.$POOL.$GROUP 
 sleep $DELAY
-status_checks
-ceph orch rm nvmeof.mypool && sleep 20 # temp workaround
+status_checks $scaled_down_gateways_count
+echo "[nvmeof.scale] Downscale complete - removed gateways (${GATEWAYS}); now scaling back up"
 ceph orch apply -i /tmp/nvmeof-gw.yaml #upscale
+ceph orch redeploy nvmeof.$POOL.$GROUP 
 sleep $DELAY
-status_checks
+status_checks $total_gateways_count
 
 echo "[nvmeof.scale] Scale testing passed for ${GATEWAYS}"
diff --git a/qa/workunits/nvmeof/setup_subsystem.sh b/qa/workunits/nvmeof/setup_subsystem.sh
index cc4024323eb..b573647b1e3 100755
--- a/qa/workunits/nvmeof/setup_subsystem.sh
+++ b/qa/workunits/nvmeof/setup_subsystem.sh
@@ -26,14 +26,21 @@ list_subsystems () {
     done
 }
 
+list_namespaces () { 
+    for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do
+        subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}"
+        sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format plain namespace list --subsystem $subsystem_nqn        
+    done
+}
+
+echo "[nvmeof] Starting subsystem setup..."
+
 # add all subsystems
 for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do
     subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}"
     sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT subsystem add --subsystem $subsystem_nqn --no-group-append
 done
 
-list_subsystems
-
 # add all gateway listeners 
 for i in "${!gateway_ips[@]}"
 do
@@ -65,11 +72,5 @@ done
 
 list_subsystems
 
-# list namespaces
-for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do
-    subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}"
-    sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format plain namespace list --subsystem $subsystem_nqn        
-done
-
 
 echo "[nvmeof] Subsystem setup done"
diff --git a/qa/workunits/rbd/cli_generic.sh b/qa/workunits/rbd/cli_generic.sh
index 2aa27d3d655..0ceb9ff54cf 100755
--- a/qa/workunits/rbd/cli_generic.sh
+++ b/qa/workunits/rbd/cli_generic.sh
@@ -914,6 +914,11 @@ test_namespace() {
 
     rbd group create rbd/test1/group1
     rbd group image add rbd/test1/group1 rbd/test1/image1
+    rbd group image add --group-pool rbd --group-namespace test1 --group group1 \
+        --image-pool rbd --image-namespace test1 --image image2
+    rbd group image rm --group-pool rbd --group-namespace test1 --group group1 \
+        --image-pool rbd --image-namespace test1 --image image1
+    rbd group image rm rbd/test1/group1 rbd/test1/image2
     rbd group rm rbd/test1/group1
 
     rbd trash move rbd/test1/image1
diff --git a/qa/workunits/rbd/cli_migration.sh b/qa/workunits/rbd/cli_migration.sh
index b044e747cbb..3af19420957 100755
--- a/qa/workunits/rbd/cli_migration.sh
+++ b/qa/workunits/rbd/cli_migration.sh
@@ -5,12 +5,16 @@ TEMPDIR=
 IMAGE1=image1
 IMAGE2=image2
 IMAGE3=image3
-IMAGES="${IMAGE1} ${IMAGE2} ${IMAGE3}"
+NAMESPACE1=namespace1
+NAMESPACE2=namespace2
+NAMESPACES="${NAMESPACE1} ${NAMESPACE2}"
+IMAGES="${IMAGE1} ${IMAGE2} ${IMAGE3} rbd/${NAMESPACE1}/${IMAGE1} rbd/${NAMESPACE2}/${IMAGE2}"
 
 cleanup() {
     kill_nbd_server
     cleanup_tempdir
     remove_images
+    remove_namespaces
 }
 
 setup_tempdir() {
@@ -42,8 +46,11 @@ create_base_image() {
 export_raw_image() {
     local image=$1
 
-    rm -rf "${TEMPDIR}/${image}"
-    rbd export ${image} "${TEMPDIR}/${image}"
+    # Replace slashes (/) with underscores (_) for namespace images
+    local export_image="${image//\//_}"
+
+    rm -rf "${TEMPDIR}/${export_image}"
+    rbd export "${image}" "${TEMPDIR}/${export_image}"
 }
 
 export_base_image() {
@@ -69,6 +76,13 @@ remove_images() {
     done
 }
 
+remove_namespaces() {
+    for namespace in ${NAMESPACES}
+    do
+        rbd namespace remove rbd/${namespace} || true
+    done
+}
+
 kill_nbd_server() {
     pkill -9 qemu-nbd || true
 }
@@ -90,6 +104,11 @@ compare_images() {
     local ret=0
 
     export_raw_image ${dst_image}
+
+    # Replace slashes (/) with underscores (_) for namespace images
+    src_image="${src_image//\//_}"
+    dst_image="${dst_image//\//_}"
+
     if ! cmp "${TEMPDIR}/${src_image}" "${TEMPDIR}/${dst_image}"
     then
         show_diff "${TEMPDIR}/${src_image}" "${TEMPDIR}/${dst_image}"
@@ -99,18 +118,26 @@ compare_images() {
 }
 
 test_import_native_format() {
-    local base_image=$1
-    local dest_image=$2
+    local base_image_spec=$1
+    local dest_image_spec=$2
+
+    # if base image is from namespace
+    local base_namespace=""
+    local base_image=${base_image_spec}
+    if [[ "${base_image_spec}" == rbd/*/* ]]; then
+        base_namespace=$(basename "$(dirname "${base_image_spec}")")
+        base_image=$(basename "${base_image_spec}")
+    fi
 
-    rbd migration prepare --import-only "rbd/${base_image}@2" ${dest_image}
-    rbd migration abort ${dest_image}
+    rbd migration prepare --import-only "${base_image_spec}@2" ${dest_image_spec}
+    rbd migration abort ${dest_image_spec}
 
     local pool_id=$(ceph osd pool ls detail --format xml | xmlstarlet sel -t -v "//pools/pool[pool_name='rbd']/pool_id")
     cat > ${TEMPDIR}/spec.json <<EOF
 {
   "type": "native",
   "pool_id": ${pool_id},
-  "pool_namespace": "",
+  "pool_namespace": "${base_namespace}",
   "image_name": "${base_image}",
   "snap_name": "2"
 }
@@ -118,85 +145,85 @@ EOF
     cat ${TEMPDIR}/spec.json
 
     rbd migration prepare --import-only \
-	--source-spec-path ${TEMPDIR}/spec.json ${dest_image}
+        --source-spec-path ${TEMPDIR}/spec.json ${dest_image_spec}
 
-    compare_images "${base_image}@1" "${dest_image}@1"
-    compare_images "${base_image}@2" "${dest_image}@2"
+    compare_images "${base_image_spec}@1" "${dest_image_spec}@1"
+    compare_images "${base_image_spec}@2" "${dest_image_spec}@2"
 
-    rbd migration abort ${dest_image}
+    rbd migration abort ${dest_image_spec}
 
     rbd migration prepare --import-only \
-        --source-spec-path ${TEMPDIR}/spec.json ${dest_image}
-    rbd migration execute ${dest_image}
+        --source-spec-path ${TEMPDIR}/spec.json ${dest_image_spec}
+    rbd migration execute ${dest_image_spec}
 
-    compare_images "${base_image}@1" "${dest_image}@1"
-    compare_images "${base_image}@2" "${dest_image}@2"
+    compare_images "${base_image_spec}@1" "${dest_image_spec}@1"
+    compare_images "${base_image_spec}@2" "${dest_image_spec}@2"
 
-    rbd migration abort ${dest_image}
+    rbd migration abort ${dest_image_spec}
 
     # no snap name or snap id
     expect_false rbd migration prepare --import-only \
-        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\"}" \
-        ${dest_image}
+        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\"}" \
+        ${dest_image_spec}
 
     # invalid source spec JSON
     expect_false rbd migration prepare --import-only \
-        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\", \"snap_name\": non-existing}" \
-        ${dest_image}
+        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_name\": non-existing}" \
+        ${dest_image_spec}
 
     # non-existing snap name
     expect_false rbd migration prepare --import-only \
-        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\", \"snap_name\": \"non-existing\"}" \
-        ${dest_image}
+        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_name\": \"non-existing\"}" \
+        ${dest_image_spec}
 
     # invalid snap name
     expect_false rbd migration prepare --import-only \
-        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\", \"snap_name\": 123456}" \
-        ${dest_image}
+        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_name\": 123456}" \
+        ${dest_image_spec}
 
     # non-existing snap id passed as int
     expect_false rbd migration prepare --import-only \
-        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\", \"snap_id\": 123456}" \
-        ${dest_image}
+        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_id\": 123456}" \
+        ${dest_image_spec}
 
     # non-existing snap id passed as string
     expect_false rbd migration prepare --import-only \
-        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\", \"snap_id\": \"123456\"}" \
-        ${dest_image}
+        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_id\": \"123456\"}" \
+        ${dest_image_spec}
 
     # invalid snap id
     expect_false rbd migration prepare --import-only \
-        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\", \"snap_id\": \"foobar\"}" \
-        ${dest_image}
+        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_id\": \"foobar\"}" \
+        ${dest_image_spec}
 
     # snap id passed as int
-    local snap_id=$(rbd snap ls ${base_image} --format xml | xmlstarlet sel -t -v "//snapshots/snapshot[name='2']/id")
+    local snap_id=$(rbd snap ls ${base_image_spec} --format xml | xmlstarlet sel -t -v "//snapshots/snapshot[name='2']/id")
     rbd migration prepare --import-only \
-        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\", \"snap_id\": ${snap_id}}" \
-        ${dest_image}
-    rbd migration abort ${dest_image}
+        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_id\": ${snap_id}}" \
+        ${dest_image_spec}
+    rbd migration abort ${dest_image_spec}
 
     # snap id passed as string
     rbd migration prepare --import-only \
-        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\", \"snap_id\": \"${snap_id}\"}" \
-        ${dest_image}
-    rbd migration abort ${dest_image}
+        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_id\": \"${snap_id}\"}" \
+        ${dest_image_spec}
+    rbd migration abort ${dest_image_spec}
 
     rbd migration prepare --import-only \
-        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\", \"snap_name\": \"2\"}" \
-        ${dest_image}
-    rbd migration abort ${dest_image}
+        --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_name\": \"2\"}" \
+        ${dest_image_spec}
+    rbd migration abort ${dest_image_spec}
 
     rbd migration prepare --import-only \
-        --source-spec "{\"type\": \"native\", \"pool_name\": \"rbd\", \"image_name\": \"${base_image}\", \"snap_name\": \"2\"}" \
-        ${dest_image}
-    rbd migration execute ${dest_image}
-    rbd migration commit ${dest_image}
+        --source-spec "{\"type\": \"native\", \"pool_name\": \"rbd\", \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_name\": \"2\"}" \
+        ${dest_image_spec}
+    rbd migration execute ${dest_image_spec}
+    rbd migration commit ${dest_image_spec}
 
-    compare_images "${base_image}@1" "${dest_image}@1"
-    compare_images "${base_image}@2" "${dest_image}@2"
+    compare_images "${base_image_spec}@1" "${dest_image_spec}@1"
+    compare_images "${base_image_spec}@2" "${dest_image_spec}@2"
 
-    remove_image "${dest_image}"
+    remove_image "${dest_image_spec}"
 }
 
 test_import_qcow_format() {
@@ -337,12 +364,12 @@ EOF
     cat ${TEMPDIR}/spec.json
 
     cat ${TEMPDIR}/spec.json | rbd migration prepare --import-only \
-	--source-spec-path - ${dest_image}
+        --source-spec-path - ${dest_image}
     compare_images ${base_image} ${dest_image}
     rbd migration abort ${dest_image}
 
     rbd migration prepare --import-only \
-	--source-spec-path ${TEMPDIR}/spec.json ${dest_image}
+        --source-spec-path ${TEMPDIR}/spec.json ${dest_image}
     rbd migration execute ${dest_image}
     rbd migration commit ${dest_image}
 
@@ -587,4 +614,18 @@ test_import_nbd_stream_qcow2 ${IMAGE2} ${IMAGE3}
 test_import_raw_format ${IMAGE1} ${IMAGE2}
 test_import_nbd_stream_raw ${IMAGE1} ${IMAGE2}
 
+rbd namespace create rbd/${NAMESPACE1}
+rbd namespace create rbd/${NAMESPACE2}
+create_base_image rbd/${NAMESPACE1}/${IMAGE1}
+export_base_image rbd/${NAMESPACE1}/${IMAGE1}
+
+# Migration from namespace to namespace
+test_import_native_format rbd/${NAMESPACE1}/${IMAGE1} rbd/${NAMESPACE2}/${IMAGE2}
+
+# Migration from namespace to non-namespace
+test_import_native_format rbd/${NAMESPACE1}/${IMAGE1} ${IMAGE2}
+
+# Migration from non-namespace to namespace
+test_import_native_format ${IMAGE1} rbd/${NAMESPACE2}/${IMAGE2}
+
 echo OK
diff --git a/qa/workunits/rest/test-restful.sh b/qa/workunits/rest/test-restful.sh
deleted file mode 100755
index fde0d107a0b..00000000000
--- a/qa/workunits/rest/test-restful.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh -ex
-
-mydir=`dirname $0`
-
-secret=`ceph config-key get mgr/restful/keys/admin`
-url=$(ceph mgr dump|jq -r .services.restful|sed -e 's/\/$//')
-echo "url $url secret $secret"
-$mydir/test_mgr_rest_api.py $url $secret
-
-echo $0 OK