summaryrefslogtreecommitdiffstats
path: root/qa
diff options
context:
space:
mode:
Diffstat (limited to 'qa')
-rw-r--r--qa/Makefile2
-rw-r--r--qa/btrfs/.gitignore3
-rw-r--r--qa/btrfs/Makefile11
-rw-r--r--qa/btrfs/clone_range.c35
-rw-r--r--qa/btrfs/create_async_snap.c34
-rw-r--r--qa/btrfs/test_async_snap.c83
-rw-r--r--qa/btrfs/test_rmdir_async_snap.c62
-rw-r--r--qa/cephfs/begin/3-kernel.yaml23
-rw-r--r--qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml9
-rw-r--r--qa/cephfs/overrides/ignorelist_health.yaml1
-rw-r--r--qa/config/crimson_bluestore.yaml25
-rw-r--r--qa/config/crimson_qa_overrides.yaml1
-rw-r--r--qa/config/crimson_seastore.yaml (renamed from qa/config/seastore.yaml)6
-rw-r--r--qa/crontab/teuthology-cronjobs2
-rw-r--r--qa/rgw/s3tests-branch.yaml4
-rwxr-xr-xqa/standalone/ceph-helpers.sh7
-rwxr-xr-xqa/standalone/mon/mon-cluster-log.sh16
-rwxr-xr-xqa/standalone/osd-backfill/osd-backfill-space.sh9
-rwxr-xr-xqa/standalone/osd/osd-rep-recov-eio.sh14
-rwxr-xr-xqa/standalone/scrub/osd-recovery-scrub.sh4
-rwxr-xr-xqa/standalone/scrub/osd-scrub-repair.sh2
-rwxr-xr-xqa/standalone/scrub/osd-scrub-test.sh238
-rw-r--r--qa/standalone/scrub/scrub-helpers.sh108
-rw-r--r--qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml1
-rw-r--r--qa/suites/ceph-ansible/smoke/basic/4-tasks/rest.yaml15
-rw-r--r--qa/suites/cephmetrics/2-ceph/ceph_ansible.yaml1
l---------qa/suites/crimson-rados-experimental/.qa2
l---------qa/suites/crimson-rados-experimental/seastore/basic/centos_latest.yaml1
-rw-r--r--qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-1.yaml14
-rw-r--r--qa/suites/crimson-rados-experimental/seastore/basic/deploy/ceph.yaml18
l---------qa/suites/crimson-rados-experimental/seastore/basic/objectstore/seastore.yaml1
-rw-r--r--qa/suites/crimson-rados-experimental/seastore/basic/tasks/rados_api_tests.yaml28
-rw-r--r--qa/suites/crimson-rados-experimental/seastore/basic/tasks/readwrite.yaml18
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/% (renamed from qa/suites/crimson-rados-experimental/seastore/basic/%)0
l---------qa/suites/crimson-rados-experimental/thrash/.qa (renamed from qa/suites/crimson-rados-experimental/seastore/.qa)0
l---------qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/.qa (renamed from qa/suites/crimson-rados-experimental/seastore/basic/.qa)0
l---------qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled1
l---------qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml1
l---------qa/suites/crimson-rados-experimental/thrash/1-pg-log-overrides/.qa (renamed from qa/suites/crimson-rados-experimental/seastore/basic/clusters/.qa)0
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/1-pg-log-overrides/normal_pg_log.yaml (renamed from qa/suites/fs/thrash/workloads/overrides/+)0
l---------qa/suites/crimson-rados-experimental/thrash/1-pg-log-overrides/short_pg_log.yaml (renamed from qa/suites/crimson-rados/thrash/1-pg-log-overrides/short_pg_log.yaml.disabled)0
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/$ (renamed from qa/suites/rados/rest/%)0
l---------qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/.qa (renamed from qa/suites/crimson-rados-experimental/seastore/basic/deploy/.qa)0
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/default.yaml0
l---------qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-active-recovery.yaml.disabled1
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-async-partial-recovery.yaml.disabled6
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-async-recovery.yaml.disabled5
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-partial-recovery.yaml.disabled5
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/clusters/+0
l---------qa/suites/crimson-rados-experimental/thrash/clusters/.qa (renamed from qa/suites/crimson-rados-experimental/seastore/basic/objectstore/.qa)0
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/clusters/fixed-2.yaml (renamed from qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-2.yaml)9
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/clusters/openstack.yaml.disabled4
l---------qa/suites/crimson-rados-experimental/thrash/crimson-supported-all-distro1
l---------qa/suites/crimson-rados-experimental/thrash/crimson_qa_overrides.yaml (renamed from qa/suites/crimson-rados-experimental/seastore/basic/crimson_qa_overrides.yaml)0
l---------qa/suites/crimson-rados-experimental/thrash/deploy/.qa (renamed from qa/suites/crimson-rados-experimental/seastore/basic/tasks/.qa)0
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/deploy/ceph.yaml11
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/deploy/cephadm.yaml.disabled16
l---------qa/suites/crimson-rados-experimental/thrash/objectstore/.qa (renamed from qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/.qa)0
l---------qa/suites/crimson-rados-experimental/thrash/objectstore/seastore.yaml1
l---------qa/suites/crimson-rados-experimental/thrash/thrashers/.qa (renamed from qa/suites/rados/rest/.qa)0
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/thrashers/default.yaml34
l---------qa/suites/crimson-rados-experimental/thrash/thrashosds-health.yaml1
l---------qa/suites/crimson-rados-experimental/thrash/workloads/.qa1
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/workloads/admin_socket_objecter_requests.yaml13
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/workloads/pool-snaps-few-objects.yaml20
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/workloads/radosbench-high-concurrency.yaml49
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/workloads/radosbench.yaml24
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/workloads/small-objects-balanced.yaml24
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/workloads/small-objects-localized.yaml24
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/workloads/small-objects.yaml (renamed from qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-balanced.yaml)14
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects-balanced.yaml15
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects-localized.yaml15
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects.yaml14
-rw-r--r--qa/suites/crimson-rados-experimental/thrash/workloads/write_fadvise_dontneed.yaml8
l---------qa/suites/crimson-rados/basic/objectstore/bluestore.yaml2
l---------qa/suites/crimson-rados/basic/objectstore/seastore.yaml2
-rw-r--r--qa/suites/crimson-rados/basic/tasks/rados_python.yaml2
l---------qa/suites/crimson-rados/perf/objectstore/bluestore.yaml2
l---------qa/suites/crimson-rados/perf/objectstore/seastore.yaml2
l---------qa/suites/crimson-rados/rbd/objectstore/bluestore.yaml2
l---------qa/suites/crimson-rados/rbd/objectstore/seastore.yaml2
l---------qa/suites/crimson-rados/singleton/objectstore1
l---------qa/suites/crimson-rados/singleton/objectstore/.qa1
l---------qa/suites/crimson-rados/singleton/objectstore/bluestore.yaml1
l---------qa/suites/crimson-rados/singleton/objectstore/seastore.yaml1
l---------qa/suites/crimson-rados/thrash/1-pg-log-overrides/short_pg_log.yaml (renamed from qa/suites/crimson-rados/thrash_simple/1-pg-log-overrides/short_pg_log.yaml.disabled)0
l---------qa/suites/crimson-rados/thrash/objectstore/bluestore.yaml2
l---------qa/suites/crimson-rados/thrash/objectstore/seastore.yaml.disabled1
l---------qa/suites/crimson-rados/thrash_simple/1-pg-log-overrides/short_pg_log.yaml1
l---------qa/suites/crimson-rados/thrash_simple/objectstore/bluestore.yaml2
l---------qa/suites/crimson-rados/thrash_simple/objectstore/seastore.yaml2
-rw-r--r--qa/suites/fs/multifs/tasks/failover.yaml1
-rw-r--r--qa/suites/fs/nfs/tasks/nfs.yaml7
-rw-r--r--qa/suites/fs/thrash/workloads/overrides/%0
l---------qa/suites/fs/thrash/workloads/overrides/prefetch_oft_dirfrags/.qa1
-rw-r--r--qa/suites/fs/thrash/workloads/overrides/prefetch_oft_dirfrags/no.yaml (renamed from qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/no.yaml)0
-rw-r--r--qa/suites/fs/thrash/workloads/overrides/prefetch_oft_dirfrags/yes.yaml (renamed from qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/yes.yaml)0
l---------qa/suites/fs/upgrade/featureful_client/old_client/kernel.yaml1
l---------qa/suites/fs/upgrade/featureful_client/upgraded_client/kernel.yaml1
l---------qa/suites/fs/upgrade/mds_upgrade_sequence/kernel.yaml1
l---------qa/suites/fs/upgrade/nofs/kernel.yaml1
l---------qa/suites/fs/upgrade/upgraded_client/kernel.yaml1
l---------qa/suites/fs/workload/begin/3-kernel.yaml1
-rw-r--r--qa/suites/fs/workload/tasks/6-workunit/kernel_untar_build.yaml1
-rw-r--r--qa/suites/nvmeof/basic/clusters/4-gateways-2-initiator.yaml2
-rw-r--r--qa/suites/nvmeof/basic/workloads/nvmeof_initiator.yaml3
-rw-r--r--qa/suites/nvmeof/basic/workloads/nvmeof_mtls.yaml36
-rw-r--r--qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml2
-rw-r--r--qa/suites/nvmeof/basic/workloads/nvmeof_scalability.yaml5
-rw-r--r--qa/suites/nvmeof/thrash/gateway-initiator-setup/10-subsys-90-namespace-no_huge_pages.yaml37
-rw-r--r--qa/suites/nvmeof/thrash/gateway-initiator-setup/120-subsys-8-namespace.yaml (renamed from qa/suites/nvmeof/thrash/gateway-initiator-setup/3-subsys-60-namespace.yaml)4
-rw-r--r--qa/suites/nvmeof/thrash/thrashers/nvmeof_mon_thrash.yaml4
-rw-r--r--qa/suites/nvmeof/thrash/thrashers/nvmeof_thrash.yaml5
-rw-r--r--qa/suites/nvmeof/thrash/workloads/fio.yaml6
-rw-r--r--qa/suites/orch/cephadm/smb/tasks/deploy_smb_mgr_clustering_ips.yaml91
-rw-r--r--qa/suites/rados/mgr/tasks/4-units/module_selftest.yaml1
-rw-r--r--qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml1
-rw-r--r--qa/suites/rados/objectstore/backends/ceph_test_bluefs.yaml8
-rw-r--r--qa/suites/rados/rest/mgr-restful.yaml31
l---------qa/suites/rados/rest/supported-random-distro$1
-rw-r--r--qa/suites/rados/singleton/all/stretch-mode-5-mons-8-osds.yaml57
-rw-r--r--qa/suites/rados/thrash-old-clients/workloads/rbd_cls.yaml9
-rw-r--r--qa/suites/rados/valgrind-leaks/1-start.yaml1
-rw-r--r--qa/suites/rados/verify/validater/valgrind.yaml2
-rw-r--r--qa/suites/rbd/migration/6-prepare/qcow2-https.yaml8
-rw-r--r--qa/suites/rgw/bucket-logging/%0
l---------qa/suites/rgw/bucket-logging/.qa1
-rw-r--r--qa/suites/rgw/bucket-logging/0-install.yaml13
l---------qa/suites/rgw/bucket-logging/beast.yaml1
l---------qa/suites/rgw/bucket-logging/fixed-1.yaml1
l---------qa/suites/rgw/bucket-logging/ignore-pg-availability.yaml1
-rw-r--r--qa/suites/rgw/bucket-logging/overrides.yaml10
l---------qa/suites/rgw/bucket-logging/s3tests-branch.yaml1
l---------qa/suites/rgw/bucket-logging/supported-distros1
-rw-r--r--qa/suites/rgw/bucket-logging/tasks/+0
-rw-r--r--qa/suites/rgw/bucket-logging/tasks/s3tests.yaml6
-rw-r--r--qa/suites/rgw/crypt/2-kms/barbican.yaml4
-rw-r--r--qa/suites/rgw/multisite/realms/two-zonegroup.yaml (renamed from qa/suites/rgw/multisite/realms/two-zonegroup.yaml.disabled)2
-rw-r--r--qa/suites/rgw/notifications/tasks/kafka/test_kafka.yaml2
-rw-r--r--qa/suites/rgw/notifications/tasks/kafka_failover/+0
-rw-r--r--qa/suites/rgw/notifications/tasks/kafka_failover/0-install.yaml20
l---------qa/suites/rgw/notifications/tasks/kafka_failover/supported-distros1
-rw-r--r--qa/suites/rgw/notifications/tasks/kafka_failover/test_kafka.yaml8
l---------qa/suites/rgw/sts/auth-order/.qa1
-rw-r--r--qa/suites/rgw/sts/auth-order/local-sts.yaml5
-rw-r--r--qa/suites/rgw/sts/auth-order/sts-local.yaml5
-rw-r--r--qa/suites/rgw/tempest/0-install.yaml2
-rw-r--r--qa/suites/rgw/tempest/tasks/s3/%0
l---------qa/suites/rgw/tempest/tasks/s3/.qa1
l---------qa/suites/rgw/tempest/tasks/s3/auth-order/.qa1
-rw-r--r--qa/suites/rgw/tempest/tasks/s3/auth-order/external-local.yaml5
-rw-r--r--qa/suites/rgw/tempest/tasks/s3/auth-order/local-external.yaml5
-rw-r--r--qa/suites/rgw/tempest/tasks/s3/s3tests.yaml (renamed from qa/suites/rgw/tempest/tasks/s3tests.yaml)0
-rw-r--r--qa/suites/rgw/verify/tasks/zzz-s3tests-java.yaml (renamed from qa/suites/rgw/verify/tasks/s3tests-java.yaml)0
-rw-r--r--qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/start.yaml15
-rw-r--r--qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/quincy.yaml14
-rw-r--r--qa/suites/upgrade/quincy-x/filestore-remove-check/2 - upgrade.yaml9
-rw-r--r--qa/suites/upgrade/quincy-x/parallel/0-start.yaml11
-rw-r--r--qa/suites/upgrade/quincy-x/parallel/1-tasks.yaml7
-rw-r--r--qa/suites/upgrade/quincy-x/parallel/workload/rados_api.yaml2
-rw-r--r--qa/suites/upgrade/quincy-x/stress-split/1-start.yaml12
-rw-r--r--qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-cls.yaml2
-rw-r--r--qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-cls.yaml2
-rw-r--r--qa/suites/upgrade/reef-x/parallel/0-start.yaml20
-rw-r--r--qa/suites/upgrade/reef-x/parallel/1-tasks.yaml8
-rw-r--r--qa/suites/upgrade/reef-x/parallel/overrides/ignorelist_health.yaml19
-rw-r--r--qa/suites/upgrade/reef-x/parallel/workload/rados_api.yaml2
-rw-r--r--qa/suites/upgrade/reef-x/stress-split/1-start.yaml14
-rw-r--r--qa/suites/upgrade/reef-x/stress-split/2-first-half-tasks/rbd-cls.yaml2
-rw-r--r--qa/suites/upgrade/reef-x/stress-split/3-stress-tasks/rbd-cls.yaml2
-rw-r--r--qa/suites/upgrade/reef-x/stress-split/overrides/ignorelist_health.yaml19
-rw-r--r--qa/tasks/ceph.py14
-rw-r--r--qa/tasks/ceph_manager.py90
-rw-r--r--qa/tasks/cephadm.py10
-rw-r--r--qa/tasks/cephfs/cephfs_test_case.py8
-rw-r--r--qa/tasks/cephfs/filesystem.py2
-rw-r--r--qa/tasks/cephfs/test_admin.py181
-rw-r--r--qa/tasks/cephfs/test_exports.py186
-rw-r--r--qa/tasks/cephfs/test_failover.py55
-rw-r--r--qa/tasks/cephfs/test_nfs.py53
-rw-r--r--qa/tasks/cephfs/test_quota.py5
-rw-r--r--qa/tasks/check_counter.py32
-rw-r--r--qa/tasks/fwd_scrub.py2
-rw-r--r--qa/tasks/kafka.py11
-rw-r--r--qa/tasks/kafka_failover.py244
-rw-r--r--qa/tasks/mgr/dashboard/helper.py4
-rw-r--r--qa/tasks/mgr/dashboard/test_mgr_module.py4
-rw-r--r--qa/tasks/mgr/dashboard/test_rbd.py12
-rw-r--r--qa/tasks/mgr/dashboard/test_rgw.py4
-rw-r--r--qa/tasks/mgr/mgr_test_case.py23
-rw-r--r--qa/tasks/mgr/test_module_selftest.py7
-rw-r--r--qa/tasks/notification_tests.py2
-rw-r--r--qa/tasks/nvme_loop.py111
-rw-r--r--qa/tasks/nvmeof.py75
-rw-r--r--qa/tasks/rados.py8
-rw-r--r--qa/tasks/radosgw_admin.py35
-rw-r--r--qa/tasks/rgw_multisite.py2
-rw-r--r--qa/tasks/rook.py6
-rw-r--r--qa/tasks/s3a_hadoop.py16
-rw-r--r--qa/tasks/s3tests.py26
-rw-r--r--qa/tasks/s3tests_java.py1
-rw-r--r--qa/tasks/stretch_mode_disable_enable.py547
-rw-r--r--qa/tasks/thrashosds-health.yaml1
-rw-r--r--qa/tasks/vstart_runner.py8
-rwxr-xr-xqa/workunits/cephtool/test.sh25
-rwxr-xr-xqa/workunits/erasure-code/bench.sh17
-rwxr-xr-xqa/workunits/fs/misc/fallocate.sh17
-rwxr-xr-xqa/workunits/fs/snaps/snaptest-git-ceph.sh14
-rwxr-xr-xqa/workunits/mon/mon-stretch-mode-5-mons-8-osds.sh72
-rwxr-xr-xqa/workunits/nvmeof/basic_tests.sh10
-rwxr-xr-xqa/workunits/nvmeof/fio_test.sh9
-rwxr-xr-xqa/workunits/nvmeof/mtls_test.sh76
-rwxr-xr-xqa/workunits/nvmeof/scalability_test.sh51
-rwxr-xr-xqa/workunits/nvmeof/setup_subsystem.sh17
-rwxr-xr-xqa/workunits/rbd/cli_generic.sh5
-rwxr-xr-xqa/workunits/rbd/cli_migration.sh141
-rwxr-xr-xqa/workunits/rest/test-restful.sh10
217 files changed, 3348 insertions, 629 deletions
diff --git a/qa/Makefile b/qa/Makefile
index ad655b7e743..05dc834adbd 100644
--- a/qa/Makefile
+++ b/qa/Makefile
@@ -1,4 +1,4 @@
-DIRS= workunits btrfs
+DIRS= workunits
all:
for d in $(DIRS) ; do ( cd $$d ; $(MAKE) all ) ; done
diff --git a/qa/btrfs/.gitignore b/qa/btrfs/.gitignore
deleted file mode 100644
index 530c1b5b4ed..00000000000
--- a/qa/btrfs/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-/clone_range
-/test_async_snap
-/create_async_snap
diff --git a/qa/btrfs/Makefile b/qa/btrfs/Makefile
deleted file mode 100644
index be95ecfd3cd..00000000000
--- a/qa/btrfs/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-CFLAGS = -Wall -Wextra -D_GNU_SOURCE
-
-TARGETS = clone_range test_async_snap create_async_snap
-
-.c:
- $(CC) $(CFLAGS) $@.c -o $@
-
-all: $(TARGETS)
-
-clean:
- rm $(TARGETS)
diff --git a/qa/btrfs/clone_range.c b/qa/btrfs/clone_range.c
deleted file mode 100644
index 0a88e160131..00000000000
--- a/qa/btrfs/clone_range.c
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <fcntl.h>
-#include <stdlib.h>
-#include <sys/ioctl.h>
-#include <string.h>
-
-#include <linux/types.h>
-#include "../../src/os/btrfs_ioctl.h"
-#include <stdio.h>
-#include <errno.h>
-
-int main(int argc, char **argv)
-{
- struct btrfs_ioctl_clone_range_args ca;
- int dfd;
- int r;
-
- if (argc < 6) {
- printf("usage: %s <srcfn> <srcoffset> <srclen> <destfn> <destoffset>\n", argv[0]);
- exit(1);
- }
-
- ca.src_fd = open(argv[1], O_RDONLY);
- ca.src_offset = atoi(argv[2]);
- ca.src_length = atoi(argv[3]);
- dfd = open(argv[4], O_WRONLY|O_CREAT);
- ca.dest_offset = atoi(argv[5]);
-
- r = ioctl(dfd, BTRFS_IOC_CLONE_RANGE, &ca);
- printf("clone_range %s %lld %lld~%lld to %s %d %lld = %d %s\n",
- argv[1], ca.src_fd,
- ca.src_offset, ca.src_length,
- argv[4], dfd,
- ca.dest_offset, r, strerror(errno));
- return r;
-}
diff --git a/qa/btrfs/create_async_snap.c b/qa/btrfs/create_async_snap.c
deleted file mode 100644
index 2ef22af7b45..00000000000
--- a/qa/btrfs/create_async_snap.c
+++ /dev/null
@@ -1,34 +0,0 @@
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <stdio.h>
-#include <sys/ioctl.h>
-#include <string.h>
-
-#include <linux/ioctl.h>
-#include <linux/types.h>
-#include "../../src/os/btrfs_ioctl.h"
-
-struct btrfs_ioctl_vol_args_v2 va;
-
-int main(int argc, char **argv)
-{
- int fd;
- int r;
-
- if (argc != 3) {
- printf("usage: %s <source subvol> <name>\n", argv[0]);
- return 1;
- }
- printf("creating snap ./%s from %s\n", argv[2], argv[1]);
- fd = open(".", O_RDONLY);
- va.fd = open(argv[1], O_RDONLY);
- va.flags = BTRFS_SUBVOL_CREATE_ASYNC;
- strcpy(va.name, argv[2]);
- r = ioctl(fd, BTRFS_IOC_SNAP_CREATE_V2, (unsigned long long)&va);
- printf("result %d\n", r ? -errno:0);
- return r;
-}
diff --git a/qa/btrfs/test_async_snap.c b/qa/btrfs/test_async_snap.c
deleted file mode 100644
index 211be95a61c..00000000000
--- a/qa/btrfs/test_async_snap.c
+++ /dev/null
@@ -1,83 +0,0 @@
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <stdio.h>
-#include <sys/ioctl.h>
-#include <string.h>
-
-#include <linux/ioctl.h>
-#include <linux/types.h>
-#include "../../src/os/btrfs_ioctl.h"
-
-struct btrfs_ioctl_vol_args_v2 va;
-struct btrfs_ioctl_vol_args vold;
-int max = 4;
-
-void check_return(int r)
-{
- if (r < 0) {
- printf("********* failed with %d %s ********\n", errno, strerror(errno));
- exit(1);
- }
-}
-
-int main(int argc, char **argv)
-{
- int num = 1000;
-
- if (argc > 1)
- num = atoi(argv[1]);
- printf("will do %d iterations\n", num);
-
- int cwd = open(".", O_RDONLY);
- printf("cwd = %d\n", cwd);
- while (num-- > 0) {
- if (rand() % 10 == 0) {
- __u64 transid;
- int r;
- printf("sync starting\n");
- r = ioctl(cwd, BTRFS_IOC_START_SYNC, &transid);
- check_return(r);
- printf("sync started, transid %lld, waiting\n", transid);
- r = ioctl(cwd, BTRFS_IOC_WAIT_SYNC, &transid);
- check_return(r);
- printf("sync finished\n");
- }
-
- int i = rand() % max;
- struct stat st;
- va.fd = cwd;
- sprintf(va.name, "test.%d", i);
- va.transid = 0;
- int r = stat(va.name, &st);
- if (r < 0) {
- if (rand() % 3 == 0) {
- printf("snap create (sync) %s\n", va.name);
- va.flags = 0;
- r = ioctl(cwd, BTRFS_IOC_SNAP_CREATE_V2, &va);
- check_return(r);
- } else {
- printf("snap create (async) %s\n", va.name);
- va.flags = BTRFS_SUBVOL_CREATE_ASYNC;
- r = ioctl(cwd, BTRFS_IOC_SNAP_CREATE_V2, &va);
- check_return(r);
- printf("snap created, transid %lld\n", va.transid);
- if (rand() % 2 == 0) {
- printf("waiting for async snap create\n");
- r = ioctl(cwd, BTRFS_IOC_WAIT_SYNC, &va.transid);
- check_return(r);
- }
- }
- } else {
- printf("snap remove %s\n", va.name);
- vold.fd = va.fd;
- strcpy(vold.name, va.name);
- r = ioctl(cwd, BTRFS_IOC_SNAP_DESTROY, &vold);
- check_return(r);
- }
- }
- return 0;
-}
diff --git a/qa/btrfs/test_rmdir_async_snap.c b/qa/btrfs/test_rmdir_async_snap.c
deleted file mode 100644
index 5dafaacaaeb..00000000000
--- a/qa/btrfs/test_rmdir_async_snap.c
+++ /dev/null
@@ -1,62 +0,0 @@
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <stdio.h>
-#include <sys/ioctl.h>
-#include <string.h>
-
-#include <linux/ioctl.h>
-#include <linux/types.h>
-#include "../../src/os/btrfs_ioctl.h"
-
-struct btrfs_ioctl_vol_args_v2 va;
-struct btrfs_ioctl_vol_args vold;
-
-int main(int argc, char **argv)
-{
- int num = 1000;
- int i, r, fd;
- char buf[30];
-
- if (argc > 1)
- num = atoi(argv[1]);
- printf("will do %d iterations\n", num);
-
- fd = open(".", O_RDONLY);
- vold.fd = 0;
- strcpy(vold.name, "current");
- r = ioctl(fd, BTRFS_IOC_SUBVOL_CREATE, (unsigned long int)&vold);
- printf("create current ioctl got %d\n", r ? errno:0);
- if (r)
- return 1;
-
- for (i=0; i<num; i++) {
- sprintf(buf, "current/dir.%d", i);
- r = mkdir(buf, 0755);
- printf("mkdir got %d\n", r ? errno:0);
- if (r)
- return 1;
- }
-
- va.fd = open("current", O_RDONLY);
- va.flags = BTRFS_SUBVOL_CREATE_ASYNC;
- for (i=0; i<num; i++) {
- system("/bin/cp /boot/vmlinuz-3.2.0-ceph-00142-g9e98323 current/foo");
- sprintf(buf, "current/dir.%d", i);
- r = rmdir(buf);
- printf("rmdir got %d\n", r ? errno:0);
- if (r)
- return 1;
-
- if (i % 10) continue;
- sprintf(va.name, "snap.%d", i);
- r = ioctl(fd, BTRFS_IOC_SNAP_CREATE_V2, (unsigned long long)&va);
- printf("ioctl got %d\n", r ? errno:0);
- if (r)
- return 1;
- }
- return 0;
-}
diff --git a/qa/cephfs/begin/3-kernel.yaml b/qa/cephfs/begin/3-kernel.yaml
new file mode 100644
index 00000000000..e94a0d87dc8
--- /dev/null
+++ b/qa/cephfs/begin/3-kernel.yaml
@@ -0,0 +1,23 @@
+# When the --kernel option is given to teuthology-suite, the kernel is set for
+# all nodes (also, the kernel is "distro" when the --kernel option is not set).
+# We don't generally want to use a custom kernel for all tests, so unset it.
+# The k-testing.yaml will set it, if given, for only the client nodes.
+#
+# Allow overriding this by using a branch ending in "-all".
+
+teuthology:
+ postmerge:
+ - |
+ local branch = yaml.kernel.branch
+ if branch and not yaml.kernel.branch:find "-all$" then
+ log.debug("removing default kernel specification: %s", yaml.kernel)
+ py_attrgetter(yaml.kernel).pop('branch', nil)
+ py_attrgetter(yaml.kernel).pop('deb', nil)
+ py_attrgetter(yaml.kernel).pop('flavor', nil)
+ py_attrgetter(yaml.kernel).pop('kdb', nil)
+ py_attrgetter(yaml.kernel).pop('koji', nil)
+ py_attrgetter(yaml.kernel).pop('koji_task', nil)
+ py_attrgetter(yaml.kernel).pop('rpm', nil)
+ py_attrgetter(yaml.kernel).pop('sha1', nil)
+ py_attrgetter(yaml.kernel).pop('tag', nil)
+ end
diff --git a/qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml b/qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml
index 2ee219125e7..048cd5ce8b9 100644
--- a/qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml
+++ b/qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml
@@ -1,3 +1,12 @@
+teuthology:
+ premerge: |
+ log.debug("base kernel %s", base_config.kernel)
+ local kernel = base_config.kernel
+ if kernel.branch ~= "distro" then
+ log.debug("overriding testing kernel with %s", kernel)
+ yaml_fragment.kernel.client = kernel
+ end
+
kernel:
client:
branch: testing
diff --git a/qa/cephfs/overrides/ignorelist_health.yaml b/qa/cephfs/overrides/ignorelist_health.yaml
index 94b42579777..5ac25a8f790 100644
--- a/qa/cephfs/overrides/ignorelist_health.yaml
+++ b/qa/cephfs/overrides/ignorelist_health.yaml
@@ -24,3 +24,4 @@ overrides:
- BLUESTORE_SLOW_OP_ALERT
- slow operation indications in BlueStore
- experiencing slow operations in BlueStore
+ - MGR_MODULE_ERROR
diff --git a/qa/config/crimson_bluestore.yaml b/qa/config/crimson_bluestore.yaml
new file mode 100644
index 00000000000..d5ba487b9bf
--- /dev/null
+++ b/qa/config/crimson_bluestore.yaml
@@ -0,0 +1,25 @@
+overrides:
+ ceph:
+ fs: xfs
+ conf:
+ osd:
+ # crimson's osd objectstore option
+ crimson osd objectstore: bluestore
+ debug alienstore: 20
+ bluestore block size: 96636764160
+ debug bluestore: 20
+ debug bluefs: 20
+ debug rocksdb: 10
+ bluestore compression mode: aggressive
+ bluestore fsck on mount: true
+ bluestore compression algorithm: snappy
+ # lower the full ratios since we can fill up a 100gb osd so quickly
+ mon osd full ratio: .9
+ mon osd backfillfull_ratio: .85
+ mon osd nearfull ratio: .8
+ osd failsafe full ratio: .95
+ bluestore rocksdb cf: false
+ log to stderr: true
+ err to stderr: true
+ log flush on exit: true
+ log to file: false
diff --git a/qa/config/crimson_qa_overrides.yaml b/qa/config/crimson_qa_overrides.yaml
index fa8f49a4986..a10c59d77cc 100644
--- a/qa/config/crimson_qa_overrides.yaml
+++ b/qa/config/crimson_qa_overrides.yaml
@@ -9,6 +9,7 @@ overrides:
osd pool default crimson: true
osd:
crimson osd obc lru size: 10
+ debug ms: 20
flavor: crimson
workunit:
env:
diff --git a/qa/config/seastore.yaml b/qa/config/crimson_seastore.yaml
index 6158563eedf..d1919456ab1 100644
--- a/qa/config/seastore.yaml
+++ b/qa/config/crimson_seastore.yaml
@@ -1,13 +1,13 @@
overrides:
ceph:
- fs: xfs
conf:
osd:
- osd objectstore: seastore
+ # crimson's osd objectstore option
+ crimson osd objectstore: seastore
debug seastore: 20
debug seastore onode: 20
debug seastore odata: 20
- debug seastore ompap: 20
+ debug seastore omap: 20
debug seastore tm: 20
debug seastore t: 20
debug seastore cleaner: 20
diff --git a/qa/crontab/teuthology-cronjobs b/qa/crontab/teuthology-cronjobs
index c979e5b105f..c558a1382ef 100644
--- a/qa/crontab/teuthology-cronjobs
+++ b/qa/crontab/teuthology-cronjobs
@@ -52,7 +52,6 @@ TEUTHOLOGY_SUITE_ARGS="--non-interactive --newest=100 --ceph-repo=https://git.ce
00 05 * * 0,2,4 $CW $SS 1 --ceph main --suite smoke -p 100 --force-priority
08 05 * * 0 $CW $SS 1 --ceph squid --suite smoke -p 100 --force-priority
16 05 * * 0 $CW $SS 1 --ceph reef --suite smoke -p 100 --force-priority
-24 05 * * 0 $CW $SS 1 --ceph quincy --suite smoke -p 100 --force-priority
## ********** windows tests on main branch - weekly
# 00 03 * * 1 CEPH_BRANCH=main; MACHINE_NAME=smithi; $CW teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s windows -k distro -e $CEPH_QA_EMAIL
@@ -122,7 +121,6 @@ TEUTHOLOGY_SUITE_ARGS="--non-interactive --newest=100 --ceph-repo=https://git.ce
16 00 * * 1 $CW $SS 1 --ceph quincy --suite upgrade-clients/client-upgrade-pacific-quincy --suite-branch pacific -p 820
24 00 * * 1 $CW $SS 120000 --ceph quincy --suite upgrade:octopus-x -p 820
32 00 * * 1 $CW $SS 120000 --ceph quincy --suite upgrade:pacific-x -p 820
-40 00 * * 1 $CW $SS 1 --ceph quincy --suite upgrade/quincy-p2p -p 820
### upgrade runs for reef release
###### on smithi
diff --git a/qa/rgw/s3tests-branch.yaml b/qa/rgw/s3tests-branch.yaml
index ef6819c87e0..8710ce35893 100644
--- a/qa/rgw/s3tests-branch.yaml
+++ b/qa/rgw/s3tests-branch.yaml
@@ -1,4 +1,4 @@
overrides:
s3tests:
- force-branch: ceph-master
- # git_remote: https://github.com/ceph/
+ force-branch: ceph-master
+ # git_remote: https://github.com/ceph/
diff --git a/qa/standalone/ceph-helpers.sh b/qa/standalone/ceph-helpers.sh
index 82bf7391a7d..72d70ca7ad5 100755
--- a/qa/standalone/ceph-helpers.sh
+++ b/qa/standalone/ceph-helpers.sh
@@ -1888,7 +1888,6 @@ function repair() {
local last_scrub=$(get_last_scrub_stamp $pgid)
ceph pg repair $pgid
wait_for_scrub $pgid "$last_scrub"
- sleep 2
}
function test_repair() {
@@ -1902,7 +1901,7 @@ function test_repair() {
wait_for_clean || return 1
repair 1.0 || return 1
kill_daemons $dir KILL osd || return 1
- ! TIMEOUT=1 repair 1.0 || return 1
+ ! TIMEOUT=2 repair 1.0 || return 1
teardown $dir || return 1
}
#######################################################################
@@ -1949,7 +1948,7 @@ function test_pg_scrub() {
wait_for_clean || return 1
pg_scrub 1.0 || return 1
kill_daemons $dir KILL osd || return 1
- ! TIMEOUT=1 pg_scrub 1.0 || return 1
+ ! TIMEOUT=2 pg_scrub 1.0 || return 1
teardown $dir || return 1
}
@@ -2089,7 +2088,7 @@ function test_wait_for_scrub() {
wait_for_scrub $pgid "$last_scrub" || return 1
kill_daemons $dir KILL osd || return 1
last_scrub=$(get_last_scrub_stamp $pgid)
- ! TIMEOUT=1 wait_for_scrub $pgid "$last_scrub" || return 1
+ ! TIMEOUT=2 wait_for_scrub $pgid "$last_scrub" || return 1
teardown $dir || return 1
}
diff --git a/qa/standalone/mon/mon-cluster-log.sh b/qa/standalone/mon/mon-cluster-log.sh
index 863a97c7cab..7b9adda0af6 100755
--- a/qa/standalone/mon/mon-cluster-log.sh
+++ b/qa/standalone/mon/mon-cluster-log.sh
@@ -62,7 +62,7 @@ function TEST_cluster_log_level() {
ceph config set mon.a mon_cluster_log_level info
ceph osd down 0
TIMEOUT=20 wait_for_osd up 0 || return 1
- grep -q "cluster [[]INF[]] osd.0.*boot" $dir/log
+ TIMEOUT=60 wait_for_string $dir/log "cluster [[]INF[]] osd.0.*boot"
return_code=$?
if [ $return_code -ne 0 ]; then
echo "Failed : Could not find INF log in the cluster log file"
@@ -145,9 +145,17 @@ function TEST_journald_cluster_log_level() {
ceph osd down 0
TIMEOUT=20 wait_for_osd up 0 || return 1
search_str="osd.0.*boot"
- sudo journalctl _COMM=ceph-mon CEPH_CHANNEL=cluster PRIORITY=6 --output=json-pretty --since "60 seconds ago" |jq '.MESSAGE' > $dir/journal.log
- grep -q "$search_str" $dir/journal.log
- return_code=$?
+ return_code=1
+ RETRY_DURATION=60
+ for ((i=0; i < $RETRY_DURATION; i++)); do
+ sudo journalctl _COMM=ceph-mon CEPH_CHANNEL=cluster PRIORITY=6 --output=json-pretty --since "60 seconds ago" |jq '.MESSAGE' > $dir/journal.log
+ if ! grep "$search_str" $dir/journal.log; then
+ sleep 1
+ else
+ return_code=0
+ break
+ fi
+ done
if [ $return_code -ne 0 ]; then
echo "Failed : Could not find INF log in the journalctl log file"
ERRORS=$(($ERRORS + 1))
diff --git a/qa/standalone/osd-backfill/osd-backfill-space.sh b/qa/standalone/osd-backfill/osd-backfill-space.sh
index 6a5c69412f4..84b9703bbfc 100755
--- a/qa/standalone/osd-backfill/osd-backfill-space.sh
+++ b/qa/standalone/osd-backfill/osd-backfill-space.sh
@@ -609,9 +609,16 @@ function TEST_backfill_grow() {
wait_for_clean || return 1
+ #Capture the timestamp after complete cleanup or finish the recovery progress
+ current_timestamp=$(date +"%Y-%m-%dT%H:%M:%S")
+
delete_pool $poolname
kill_daemons $dir || return 1
- ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1
+
+ #Ignore the num_bytes mismatch messages before calling wait_cleanup
+ if ! awk -v ts="$current_timestamp" '$0 >= ts && /num_bytes mismatch/' $dir/osd.*.log > /dev/null; then
+ return 1
+ fi
}
# Create a 5 shard EC pool on 6 OSD cluster
diff --git a/qa/standalone/osd/osd-rep-recov-eio.sh b/qa/standalone/osd/osd-rep-recov-eio.sh
index 6fea441b3a9..a34f4a47189 100755
--- a/qa/standalone/osd/osd-rep-recov-eio.sh
+++ b/qa/standalone/osd/osd-rep-recov-eio.sh
@@ -219,6 +219,18 @@ function TEST_rados_repair_warning() {
ceph health | $(! grep -q "Too many repaired reads on 1 OSDs") || return 1
set +o pipefail
+ ceph health unmute OSD_TOO_MANY_REPAIRS
+ ceph tell osd.$primary clear_shards_repaired
+ sleep 10
+
+ set -o pipefail
+ # Should clear this
+ ceph health | $(! grep -q "Too many repaired reads on 1 OSDs") || return 1
+ set +o pipefail
+
+ ceph tell osd.$primary clear_shards_repaired $OBJS
+ sleep 10
+
for i in $(seq 1 $OBJS)
do
inject_$inject rep data $poolname ${objbase}-$i $dir 0 || return 1
@@ -235,7 +247,7 @@ function TEST_rados_repair_warning() {
COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
test "$COUNT" = "$(expr $OBJS \* 3)" || return 1
- # Give mon a chance to notice additional OSD and unmute
+ # Give mon a chance to notice additional OSD and reset num_shards_repaired
# The default tick time is 5 seconds
CHECKTIME=10
LOOPS=0
diff --git a/qa/standalone/scrub/osd-recovery-scrub.sh b/qa/standalone/scrub/osd-recovery-scrub.sh
index 843e9b9901b..7b77a60f35b 100755
--- a/qa/standalone/scrub/osd-recovery-scrub.sh
+++ b/qa/standalone/scrub/osd-recovery-scrub.sh
@@ -163,7 +163,7 @@ function wait_for_scrub_mod() {
fi
sleep 1
# are we still the primary?
- local current_primary=`bin/ceph pg $pgid query | jq '.acting[0]' `
+ local current_primary=`./bin/ceph pg $pgid query | jq '.acting[0]' `
if [ $orig_primary != $current_primary ]; then
echo $orig_primary no longer primary for $pgid
return 0
@@ -194,7 +194,7 @@ function pg_scrub_mod() {
local last_scrub=$(get_last_scrub_stamp $pgid)
# locate the primary
- local my_primary=`bin/ceph pg $pgid query | jq '.acting[0]' `
+ local my_primary=`./bin/ceph pg $pgid query | jq '.acting[0]' `
local recovery=false
ceph pg scrub $pgid
#ceph --format json pg dump pgs | jq ".pg_stats | .[] | select(.pgid == \"$pgid\") | .state"
diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh
index 491e46603f7..6dd5b10ae8f 100755
--- a/qa/standalone/scrub/osd-scrub-repair.sh
+++ b/qa/standalone/scrub/osd-scrub-repair.sh
@@ -5833,7 +5833,7 @@ function TEST_periodic_scrub_replicated() {
flush_pg_stats
# Request a regular scrub and it will be done
- pg_schedule_scrub $pg
+ pg_scrub $pg
grep -q "Regular scrub request, deep-scrub details will be lost" $dir/osd.${primary}.log || return 1
# deep-scrub error is no longer present
diff --git a/qa/standalone/scrub/osd-scrub-test.sh b/qa/standalone/scrub/osd-scrub-test.sh
index 8015e023bdd..385479258f2 100755
--- a/qa/standalone/scrub/osd-scrub-test.sh
+++ b/qa/standalone/scrub/osd-scrub-test.sh
@@ -544,6 +544,9 @@ function TEST_dump_scrub_schedule() {
--osd_op_queue=wpq \
--osd_stats_update_period_not_scrubbing=1 \
--osd_stats_update_period_scrubbing=1 \
+ --osd_scrub_retry_after_noscrub=1 \
+ --osd_scrub_retry_pg_state=2 \
+ --osd_scrub_retry_delay=2 \
--osd_scrub_sleep=0.2"
for osd in $(seq 0 $(expr $OSDS - 1))
@@ -600,17 +603,16 @@ function TEST_dump_scrub_schedule() {
declare -A expct_dmp_duration=( ['dmp_last_duration']="0" ['dmp_last_duration_neg']="not0" )
wait_any_cond $pgid 10 $saved_last_stamp expct_dmp_duration "WaitingAfterScrub_dmp " sched_data || return 1
- sleep 2
-
#
# step 2: set noscrub and request a "periodic scrub". Watch for the change in the 'is the scrub
# scheduled for the future' value
#
- ceph tell osd.* config set osd_shallow_scrub_chunk_max "3" || return 1
- ceph tell osd.* config set osd_scrub_sleep "2.0" || return 1
ceph osd set noscrub || return 1
sleep 2
+ ceph tell osd.* config set osd_shallow_scrub_chunk_max "3" || return 1
+ ceph tell osd.* config set osd_scrub_sleep "2.0" || return 1
+ sleep 8
saved_last_stamp=${sched_data['query_last_stamp']}
ceph tell $pgid schedule-scrub
@@ -683,6 +685,234 @@ function TEST_pg_dump_objects_scrubbed() {
teardown $dir || return 1
}
+function wait_initial_scrubs() {
+ local -n pg_to_prim_dict=$1
+ local extr_dbg=1 # note: 3 and above leave some temp files around
+
+ # set a long schedule for the periodic scrubs. Wait for the
+ # initial 'no previous scrub is known' scrubs to finish for all PGs.
+ ceph tell osd.* config set osd_scrub_min_interval 7200
+ ceph tell osd.* config set osd_deep_scrub_interval 14400
+ ceph tell osd.* config set osd_max_scrubs 32
+ ceph tell osd.* config set osd_scrub_sleep 0
+ ceph tell osd.* config set osd_shallow_scrub_chunk_max 10
+ ceph tell osd.* config set osd_scrub_chunk_max 10
+
+ for pg in "${!pg_to_prim_dict[@]}"; do
+ (( extr_dbg >= 1 )) && echo "Scheduling initial scrub for $pg"
+ ceph tell $pg scrub || return 1
+ done
+
+ sleep 1
+ (( extr_dbg >= 1 )) && ceph pg dump pgs --format=json-pretty | \
+ jq '.pg_stats | map(select(.last_scrub_duration == 0)) | map({pgid: .pgid, last_scrub_duration: .last_scrub_duration})'
+
+ tout=20
+ while [ $tout -gt 0 ] ; do
+ sleep 0.5
+ (( extr_dbg >= 2 )) && ceph pg dump pgs --format=json-pretty | \
+ jq '.pg_stats | map(select(.last_scrub_duration == 0)) | map({pgid: .pgid, last_scrub_duration: .last_scrub_duration})'
+ not_done=$(ceph pg dump pgs --format=json-pretty | \
+ jq '.pg_stats | map(select(.last_scrub_duration == 0)) | map({pgid: .pgid, last_scrub_duration: .last_scrub_duration})' | wc -l )
+ # note that we should ignore a header line
+ if [ "$not_done" -le 1 ]; then
+ break
+ fi
+ not_done=$(( (not_done - 2) / 4 ))
+ echo "Still waiting for $not_done PGs to finish initial scrubs (timeout $tout)"
+ tout=$((tout - 1))
+ done
+ (( tout == 0 )) && return 1
+ return 0
+}
+
+
+# Whenever a PG is being scrubbed at a regular, periodic, urgency, and is queued
+# for its replicas:
+# if the operator is requesting a scrub of the same PG, the operator's request
+# should trigger an abort of the ongoing scrub.
+#
+# The test process:
+# - a periodic scrub is initiated of a PG. That scrub is set to be a very slow one.
+# - a second PG, which shares some of its replicas, is intrcuted to be scrubbed. That one
+# should be stuck in replica reservation. We will verify that.
+# - now - the operator is requesting that second PG to be scrubbed. The original (pending)
+# scrub should be aborted. We would check for:
+# - the new, operator's scrub to be scheduled
+# - the replicas' reservers to be released
+function TEST_abort_periodic_for_operator() {
+ local dir=$1
+ local -A cluster_conf=(
+ ['osds_num']="5"
+ ['pgs_in_pool']="16"
+ ['pool_name']="test"
+ )
+ local extr_dbg=1 # note: 3 and above leave some temp files around
+
+ standard_scrub_wpq_cluster "$dir" cluster_conf 3 || return 1
+ local poolid=${cluster_conf['pool_id']}
+ local poolname=${cluster_conf['pool_name']}
+ echo "Pool: $poolname : $poolid"
+
+ #turn off '-x' (but remember previous state)
+ local saved_echo_flag=${-//[^x]/}
+ set +x
+
+ # fill the pool with some data
+ TESTDATA="testdata.$$"
+ dd if=/dev/urandom of=$TESTDATA bs=320 count=1
+ for i in $( seq 1 256 )
+ do
+ rados -p "$poolname" put "obj${i}" $TESTDATA 2>/dev/null 1>/dev/null
+ done
+ rm -f $TESTDATA
+ if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+
+ # create the dictionary of the PGs in the pool
+ declare -A pg_pr
+ declare -A pg_ac
+ declare -A pg_po
+ build_pg_dicts "$dir" pg_pr pg_ac pg_po "-"
+ (( extr_dbg >= 2 )) && echo "PGs table:"
+ for pg in "${!pg_pr[@]}"; do
+ (( extr_dbg >= 2 )) && echo "Got: $pg: ${pg_pr[$pg]} ( ${pg_ac[$pg]} ) ${pg_po[$pg]}"
+ done
+
+ wait_initial_scrubs pg_pr || return 1
+
+ # limit all OSDs to one scrub at a time
+ ceph tell osd.* config set osd_max_scrubs 1
+ ceph tell osd.* config set osd_stats_update_period_not_scrubbing 1
+
+ # configure for slow scrubs
+ ceph tell osd.* config set osd_scrub_sleep 3
+ ceph tell osd.* config set osd_shallow_scrub_chunk_max 2
+ ceph tell osd.* config set osd_scrub_chunk_max 2
+ (( extr_dbg >= 2 )) && ceph tell osd.2 dump_scrub_reservations --format=json-pretty
+
+ # the first PG to work with:
+ local pg1="1.0"
+ # and another one, that shares its primary, and at least one more active set member
+ local pg2=""
+ for pg in "${!pg_pr[@]}"; do
+ if [[ "${pg_pr[$pg]}" == "${pg_pr[$pg1]}" ]]; then
+ local -i common=0
+ count_common_active $pg $pg1 pg_ac common
+ if [[ $common -gt 1 ]]; then
+ pg2=$pg
+ break
+ fi
+ fi
+ done
+ if [[ -z "$pg2" ]]; then
+ # \todo handle the case when no such PG is found
+ echo "No PG found with the same primary as $pg1"
+ return 1
+ fi
+
+ # the common primary is allowed two concurrent scrubs
+ ceph tell osd."${pg_pr[$pg1]}" config set osd_max_scrubs 2
+ echo "The two PGs to manipulate are $pg1 and $pg2"
+
+ set_query_debug "$pg1"
+ # wait till the information published by pg1 is updated to show it as
+ # not being scrubbed
+ local is_act
+ for i in $( seq 1 3 )
+ do
+ is_act=$(ceph pg "$pg1" query | jq '.scrubber.active')
+ if [[ "$is_act" = "false" ]]; then
+ break
+ fi
+ echo "Still waiting for pg $pg1 to finish scrubbing"
+ sleep 0.7
+ done
+ ceph pg dump pgs
+ if [[ "$is_act" != "false" ]]; then
+ ceph pg "$pg1" query
+ echo "PG $pg1 appears to be still scrubbing"
+ return 1
+ fi
+ sleep 0.5
+
+ echo "Initiating a periodic scrub of $pg1"
+ (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
+ ceph tell $pg1 schedule-deep-scrub || return 1
+ sleep 1
+ (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
+
+ for i in $( seq 1 14 )
+ do
+ sleep 0.5
+ stt=$(ceph pg "$pg1" query | jq '.scrubber')
+ is_active=$(echo $stt | jq '.active')
+ is_reserving_replicas=$(echo $stt | jq '.is_reserving_replicas')
+ if [[ "$is_active" = "true" && "$is_reserving_replicas" = "false" ]]; then
+ break
+ fi
+ echo "Still waiting for pg $pg1 to start scrubbing: $stt"
+ done
+ if [[ "$is_active" != "true" || "$is_reserving_replicas" != "false" ]]; then
+ ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
+ echo "The scrub is not active or is reserving replicas"
+ return 1
+ fi
+ (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
+
+
+ # PG 1 is scrubbing, and has reserved the replicas - soem of which are shared
+ # by PG 2. As the max-scrubs was set to 1, that should prevent PG 2 from
+ # reserving its replicas.
+
+ (( extr_dbg >= 1 )) && ceph tell osd.* dump_scrub_reservations --format=json-pretty
+
+ # now - the 2'nd scrub - which should be blocked on reserving
+ set_query_debug "$pg2"
+ ceph tell "$pg2" schedule-deep-scrub
+ sleep 0.5
+ (( extr_dbg >= 2 )) && echo "===================================================================================="
+ (( extr_dbg >= 2 )) && ceph pg "$pg2" query -f json-pretty | jq '.scrubber'
+ (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
+ sleep 1
+ (( extr_dbg >= 2 )) && echo "===================================================================================="
+ (( extr_dbg >= 2 )) && ceph pg "$pg2" query -f json-pretty | jq '.scrubber'
+ (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
+
+ # make sure pg2 scrub is stuck in the reserving state
+ local stt2=$(ceph pg "$pg2" query | jq '.scrubber')
+ local pg2_is_reserving
+ pg2_is_reserving=$(echo $stt2 | jq '.is_reserving_replicas')
+ if [[ "$pg2_is_reserving" != "true" ]]; then
+ echo "The scheduled scrub for $pg2 should have been stuck"
+ ceph pg dump pgs
+ return 1
+ fi
+
+ # now - issue an operator-initiated scrub on pg2.
+ # The periodic scrub should be aborted, and the operator-initiated scrub should start.
+ echo "Instructing $pg2 to perform a high-priority scrub"
+ ceph tell "$pg2" scrub
+ for i in $( seq 1 10 )
+ do
+ sleep 0.5
+ stt2=$(ceph pg "$pg2" query | jq '.scrubber')
+ pg2_is_active=$(echo $stt2 | jq '.active')
+ pg2_is_reserving=$(echo $stt2 | jq '.is_reserving_replicas')
+ if [[ "$pg2_is_active" = "true" && "$pg2_is_reserving" != "true" ]]; then
+ break
+ fi
+ echo "Still waiting: $stt2"
+ done
+
+ if [[ "$pg2_is_active" != "true" || "$pg2_is_reserving" = "true" ]]; then
+ echo "The high-priority scrub for $pg2 is not active or is reserving replicas"
+ return 1
+ fi
+ echo "Done"
+}
+
+
+
main osd-scrub-test "$@"
# Local Variables:
diff --git a/qa/standalone/scrub/scrub-helpers.sh b/qa/standalone/scrub/scrub-helpers.sh
index 49b8346b8d2..dd37b643e08 100644
--- a/qa/standalone/scrub/scrub-helpers.sh
+++ b/qa/standalone/scrub/scrub-helpers.sh
@@ -240,8 +240,8 @@ function standard_scrub_cluster() {
local saved_echo_flag=${-//[^x]/}
set +x
- run_mon $dir a --osd_pool_default_size=$OSDS || return 1
- run_mgr $dir x || return 1
+ run_mon $dir a --osd_pool_default_size=3 || return 1
+ run_mgr $dir x --mgr_stats_period=1 || return 1
local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
--osd_scrub_interval_randomize_ratio=0 \
@@ -249,9 +249,12 @@ function standard_scrub_cluster() {
--osd_pool_default_pg_autoscale_mode=off \
--osd_pg_stat_report_interval_max_seconds=1 \
--osd_pg_stat_report_interval_max_epochs=1 \
+ --osd_stats_update_period_not_scrubbing=3 \
+ --osd_stats_update_period_scrubbing=1 \
--osd_scrub_retry_after_noscrub=5 \
--osd_scrub_retry_pg_state=5 \
--osd_scrub_retry_delay=3 \
+ --osd_pool_default_size=3 \
$extra_pars"
for osd in $(seq 0 $(expr $OSDS - 1))
@@ -297,6 +300,107 @@ function standard_scrub_wpq_cluster() {
}
+# Parse the output of a 'pg dump pgs_brief' command and build a set of dictionaries:
+# - pg_primary_dict: a dictionary of pgid -> acting_primary
+# - pg_acting_dict: a dictionary of pgid -> acting set
+# - pg_pool_dict: a dictionary of pgid -> pool
+# If the input file is '-', the function will fetch the dump directly from the ceph cluster.
+function build_pg_dicts {
+ local dir=$1
+ local -n pg_primary_dict=$2
+ local -n pg_acting_dict=$3
+ local -n pg_pool_dict=$4
+ local infile=$5
+
+ local extr_dbg=0 # note: 3 and above leave some temp files around
+
+ #turn off '-x' (but remember previous state)
+ local saved_echo_flag=${-//[^x]/}
+ set +x
+
+ # if the infile name is '-', fetch the dump directly from the ceph cluster
+ if [[ $infile == "-" ]]; then
+ local -r ceph_cmd="ceph pg dump pgs_brief -f=json-pretty"
+ local -r ceph_cmd_out=$(eval $ceph_cmd)
+ local -r ceph_cmd_rc=$?
+ if [[ $ceph_cmd_rc -ne 0 ]]; then
+ echo "Error: the command '$ceph_cmd' failed with return code $ceph_cmd_rc"
+ fi
+ (( extr_dbg >= 3 )) && echo "$ceph_cmd_out" > /tmp/e2
+ l0=`echo "$ceph_cmd_out" | jq '[.pg_stats | group_by(.pg_stats)[0] | map({pgid: .pgid, pool: (.pgid | split(".")[0]), acting: .acting, acting_primary: .acting_primary})] | .[]' `
+ else
+ l0=`jq '[.pg_stats | group_by(.pg_stats)[0] | map({pgid: .pgid, pool: (.pgid | split(".")[0]), acting: .acting, acting_primary: .acting_primary})] | .[]' $infile `
+ fi
+ (( extr_dbg >= 2 )) && echo "L0: $l0"
+
+ mapfile -t l1 < <(echo "$l0" | jq -c '.[]')
+ (( extr_dbg >= 2 )) && echo "L1: ${#l1[@]}"
+
+ for item in "${l1[@]}"; do
+ pgid=$(echo "$item" | jq -r '.pgid')
+ acting=$(echo "$item" | jq -r '.acting | @sh')
+ pg_acting_dict["$pgid"]=$acting
+ acting_primary=$(echo "$item" | jq -r '.acting_primary')
+ pg_primary_dict["$pgid"]=$acting_primary
+ pool=$(echo "$item" | jq -r '.pool')
+ pg_pool_dict["$pgid"]=$pool
+ done
+
+ if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+}
+
+
+# a function that counts the number of common active-set elements between two PGs
+# 1 - the first PG
+# 2 - the second PG
+# 3 - the dictionary of active sets
+function count_common_active {
+ local pg1=$1
+ local pg2=$2
+ local -n pg_acting_dict=$3
+ local -n res=$4
+
+ local -a a1=(${pg_acting_dict[$pg1]})
+ local -a a2=(${pg_acting_dict[$pg2]})
+
+ local -i cnt=0
+ for i in "${a1[@]}"; do
+ for j in "${a2[@]}"; do
+ if [[ $i -eq $j ]]; then
+ cnt=$((cnt+1))
+ fi
+ done
+ done
+
+ res=$cnt
+}
+
+
+# given a PG, find another one with a disjoint active set
+# - but allow a possible common Primary
+# 1 - the PG
+# 2 - the dictionary of active sets
+# 3 - [out] - the PG with a disjoint active set
+function find_disjoint_but_primary {
+ local pg=$1
+ local -n ac_dict=$2
+ local -n p_dict=$3
+ local -n res=$4
+
+ for cand in "${!ac_dict[@]}"; do
+ if [[ "$cand" != "$pg" ]]; then
+ local -i common=0
+ count_common_active "$pg" "$cand" ac_dict common
+ if [[ $common -eq 0 || ( $common -eq 1 && "${p_dict[$pg]}" == "${p_dict[$cand]}" )]]; then
+ res=$cand
+ return
+ fi
+ fi
+ done
+}
+
+
+
# A debug flag is set for the PG specified, causing the 'pg query' command to display
# an additional 'scrub sessions counter' field.
#
diff --git a/qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml b/qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml
index 7e7ede3e334..5be06bc6732 100644
--- a/qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml
+++ b/qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml
@@ -21,7 +21,6 @@ overrides:
ceph_repository: dev
ceph_mgr_modules:
- status
- - restful
cephfs_pools:
- name: "cephfs_data"
pg_num: "64"
diff --git a/qa/suites/ceph-ansible/smoke/basic/4-tasks/rest.yaml b/qa/suites/ceph-ansible/smoke/basic/4-tasks/rest.yaml
deleted file mode 100644
index 8e389134b92..00000000000
--- a/qa/suites/ceph-ansible/smoke/basic/4-tasks/rest.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-tasks:
-- exec:
- mgr.x:
- - systemctl stop ceph-mgr.target
- - sleep 5
- - ceph -s
-- exec:
- mon.a:
- - ceph restful create-key admin
- - ceph restful create-self-signed-cert
- - ceph restful restart
-- workunit:
- clients:
- client.0:
- - rest/test-restful.sh
diff --git a/qa/suites/cephmetrics/2-ceph/ceph_ansible.yaml b/qa/suites/cephmetrics/2-ceph/ceph_ansible.yaml
index 309f5060045..53e2b7fdbc8 100644
--- a/qa/suites/cephmetrics/2-ceph/ceph_ansible.yaml
+++ b/qa/suites/cephmetrics/2-ceph/ceph_ansible.yaml
@@ -20,7 +20,6 @@ overrides:
ceph_repository: dev
ceph_mgr_modules:
- status
- - restful
cephfs_pools:
- name: "cephfs_data"
pg_num: "64"
diff --git a/qa/suites/crimson-rados-experimental/.qa b/qa/suites/crimson-rados-experimental/.qa
index fea2489fdf6..a602a0353e7 120000
--- a/qa/suites/crimson-rados-experimental/.qa
+++ b/qa/suites/crimson-rados-experimental/.qa
@@ -1 +1 @@
-../.qa \ No newline at end of file
+../.qa/ \ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/centos_latest.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/centos_latest.yaml
deleted file mode 120000
index bd9854e7029..00000000000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/centos_latest.yaml
+++ /dev/null
@@ -1 +0,0 @@
-.qa/distros/supported/centos_latest.yaml \ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-1.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-1.yaml
deleted file mode 100644
index d8e5898b99f..00000000000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-1.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-overrides:
- ceph-deploy:
- conf:
- global:
- osd pool default size: 2
- osd crush chooseleaf type: 0
- osd pool default pg num: 128
- osd pool default pgp num: 128
- ceph:
- conf:
- osd:
- osd shutdown pgref assert: true
-roles:
-- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0]
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/deploy/ceph.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/deploy/ceph.yaml
deleted file mode 100644
index c22f08eecf8..00000000000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/deploy/ceph.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-overrides:
- install:
- ceph:
- flavor: crimson
-tasks:
-- install:
-- ceph:
- conf:
- osd:
- debug monc: 20
- mon:
- mon min osdmap epochs: 50
- paxos service trim min: 10
- # prune full osdmaps regularly
- mon osdmap full prune min: 15
- mon osdmap full prune interval: 2
- mon osdmap full prune txsize: 2
- flavor: crimson
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/seastore.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/seastore.yaml
deleted file mode 120000
index 6a70c381709..00000000000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/seastore.yaml
+++ /dev/null
@@ -1 +0,0 @@
-.qa/config/seastore.yaml \ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/tasks/rados_api_tests.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/tasks/rados_api_tests.yaml
deleted file mode 100644
index ad8c921425b..00000000000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/tasks/rados_api_tests.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-overrides:
- ceph:
- log-ignorelist:
- - reached quota
- - but it is still running
- - overall HEALTH_
- - \(POOL_FULL\)
- - \(SMALLER_PGP_NUM\)
- - \(CACHE_POOL_NO_HIT_SET\)
- - \(CACHE_POOL_NEAR_FULL\)
- - \(POOL_APP_NOT_ENABLED\)
- - \(PG_AVAILABILITY\)
- - \(PG_DEGRADED\)
- conf:
- client:
- debug ms: 1
- mon:
- mon warn on pool no app: false
- osd:
- osd class load list: "*"
- osd class default list: "*"
- osd blocked scrub grace period: 3600
-tasks:
-- workunit:
- clients:
- client.0:
- - rados/test.sh
- - rados/test_pool_quota.sh
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/tasks/readwrite.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/tasks/readwrite.yaml
deleted file mode 100644
index 25efcdac83d..00000000000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/tasks/readwrite.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-overrides:
- ceph:
- crush_tunables: optimal
- conf:
- mon:
- mon osd initial require min compat client: luminous
- osd:
- osd_discard_disconnected_ops: false
-tasks:
-- rados:
- clients: [client.0]
- ops: 4000
- objects: 500
- max_attr_len: 8192
- op_weights:
- read: 45
- write: 45
- delete: 10
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/% b/qa/suites/crimson-rados-experimental/thrash/%
index e69de29bb2d..e69de29bb2d 100644
--- a/qa/suites/crimson-rados-experimental/seastore/basic/%
+++ b/qa/suites/crimson-rados-experimental/thrash/%
diff --git a/qa/suites/crimson-rados-experimental/seastore/.qa b/qa/suites/crimson-rados-experimental/thrash/.qa
index a602a0353e7..a602a0353e7 120000
--- a/qa/suites/crimson-rados-experimental/seastore/.qa
+++ b/qa/suites/crimson-rados-experimental/thrash/.qa
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/.qa b/qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/.qa
index a602a0353e7..a602a0353e7 120000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/.qa
+++ b/qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/.qa
diff --git a/qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled b/qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled
new file mode 120000
index 00000000000..5393a75548a
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled
@@ -0,0 +1 @@
+.qa/overrides/2-size-2-min-size.yaml \ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml b/qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml
new file mode 120000
index 00000000000..5ff70eadf75
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml
@@ -0,0 +1 @@
+.qa/overrides/3-size-2-min-size.yaml \ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/clusters/.qa b/qa/suites/crimson-rados-experimental/thrash/1-pg-log-overrides/.qa
index a602a0353e7..a602a0353e7 120000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/clusters/.qa
+++ b/qa/suites/crimson-rados-experimental/thrash/1-pg-log-overrides/.qa
diff --git a/qa/suites/fs/thrash/workloads/overrides/+ b/qa/suites/crimson-rados-experimental/thrash/1-pg-log-overrides/normal_pg_log.yaml
index e69de29bb2d..e69de29bb2d 100644
--- a/qa/suites/fs/thrash/workloads/overrides/+
+++ b/qa/suites/crimson-rados-experimental/thrash/1-pg-log-overrides/normal_pg_log.yaml
diff --git a/qa/suites/crimson-rados/thrash/1-pg-log-overrides/short_pg_log.yaml.disabled b/qa/suites/crimson-rados-experimental/thrash/1-pg-log-overrides/short_pg_log.yaml
index abd86d7d986..abd86d7d986 120000
--- a/qa/suites/crimson-rados/thrash/1-pg-log-overrides/short_pg_log.yaml.disabled
+++ b/qa/suites/crimson-rados-experimental/thrash/1-pg-log-overrides/short_pg_log.yaml
diff --git a/qa/suites/rados/rest/% b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/$
index e69de29bb2d..e69de29bb2d 100644
--- a/qa/suites/rados/rest/%
+++ b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/$
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/deploy/.qa b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/.qa
index a602a0353e7..a602a0353e7 120000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/deploy/.qa
+++ b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/.qa
diff --git a/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/default.yaml b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/default.yaml
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/default.yaml
diff --git a/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-active-recovery.yaml.disabled b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-active-recovery.yaml.disabled
new file mode 120000
index 00000000000..47afd70202d
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-active-recovery.yaml.disabled
@@ -0,0 +1 @@
+.qa/overrides/more-active-recovery.yaml \ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-async-partial-recovery.yaml.disabled b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-async-partial-recovery.yaml.disabled
new file mode 100644
index 00000000000..0bbc72db754
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-async-partial-recovery.yaml.disabled
@@ -0,0 +1,6 @@
+overrides:
+ ceph:
+ conf:
+ global:
+ osd_async_recovery_min_cost: 1
+ osd_object_clean_region_max_num_intervals: 1000
diff --git a/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-async-recovery.yaml.disabled b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-async-recovery.yaml.disabled
new file mode 100644
index 00000000000..4aed086bcc3
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-async-recovery.yaml.disabled
@@ -0,0 +1,5 @@
+overrides:
+ ceph:
+ conf:
+ global:
+ osd_async_recovery_min_cost: 1
diff --git a/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-partial-recovery.yaml.disabled b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-partial-recovery.yaml.disabled
new file mode 100644
index 00000000000..88f15f2f691
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/2-recovery-overrides/more-partial-recovery.yaml.disabled
@@ -0,0 +1,5 @@
+overrides:
+ ceph:
+ conf:
+ global:
+ osd_object_clean_region_max_num_intervals: 1000
diff --git a/qa/suites/crimson-rados-experimental/thrash/clusters/+ b/qa/suites/crimson-rados-experimental/thrash/clusters/+
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/clusters/+
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/.qa b/qa/suites/crimson-rados-experimental/thrash/clusters/.qa
index a602a0353e7..a602a0353e7 120000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/.qa
+++ b/qa/suites/crimson-rados-experimental/thrash/clusters/.qa
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-2.yaml b/qa/suites/crimson-rados-experimental/thrash/clusters/fixed-2.yaml
index 9774de6887b..79641f695ab 100644
--- a/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-2.yaml
+++ b/qa/suites/crimson-rados-experimental/thrash/clusters/fixed-2.yaml
@@ -6,6 +6,15 @@ overrides:
conf:
osd:
osd shutdown pgref assert: true
+ crimson alien thread cpu cores: 6-7
+ osd.0:
+ crimson seastar cpu cores: 0-2
+ osd.1:
+ crimson seastar cpu cores: 3-5
+ osd.2:
+ crimson seastar cpu cores: 0-2
+ osd.3:
+ crimson seastar cpu cores: 3-5
global:
ms cluster mode: crc
ms service mode: crc
diff --git a/qa/suites/crimson-rados-experimental/thrash/clusters/openstack.yaml.disabled b/qa/suites/crimson-rados-experimental/thrash/clusters/openstack.yaml.disabled
new file mode 100644
index 00000000000..e559d9126e8
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/clusters/openstack.yaml.disabled
@@ -0,0 +1,4 @@
+openstack:
+ - volumes: # attached to each instance
+ count: 4
+ size: 10 # GB
diff --git a/qa/suites/crimson-rados-experimental/thrash/crimson-supported-all-distro b/qa/suites/crimson-rados-experimental/thrash/crimson-supported-all-distro
new file mode 120000
index 00000000000..a5b729b9efa
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/crimson-supported-all-distro
@@ -0,0 +1 @@
+.qa/distros/crimson-supported-all-distro/ \ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/crimson_qa_overrides.yaml b/qa/suites/crimson-rados-experimental/thrash/crimson_qa_overrides.yaml
index 2bf67af1b18..2bf67af1b18 120000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/crimson_qa_overrides.yaml
+++ b/qa/suites/crimson-rados-experimental/thrash/crimson_qa_overrides.yaml
diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/tasks/.qa b/qa/suites/crimson-rados-experimental/thrash/deploy/.qa
index a602a0353e7..a602a0353e7 120000
--- a/qa/suites/crimson-rados-experimental/seastore/basic/tasks/.qa
+++ b/qa/suites/crimson-rados-experimental/thrash/deploy/.qa
diff --git a/qa/suites/crimson-rados-experimental/thrash/deploy/ceph.yaml b/qa/suites/crimson-rados-experimental/thrash/deploy/ceph.yaml
new file mode 100644
index 00000000000..ecad09cfe3a
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/deploy/ceph.yaml
@@ -0,0 +1,11 @@
+overrides:
+ install:
+ ceph:
+ flavor: crimson
+tasks:
+- install:
+- ceph:
+ conf:
+ osd:
+ debug monc: 20
+ flavor: crimson
diff --git a/qa/suites/crimson-rados-experimental/thrash/deploy/cephadm.yaml.disabled b/qa/suites/crimson-rados-experimental/thrash/deploy/cephadm.yaml.disabled
new file mode 100644
index 00000000000..0c2062240ee
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/deploy/cephadm.yaml.disabled
@@ -0,0 +1,16 @@
+# no need to verify os + flavor + sha1
+verify_ceph_hash: false
+tasks:
+- cephadm:
+ conf:
+ mgr:
+ debug ms: 1
+ debug mgr: 20
+ debug osd: 10
+- cephadm.shell:
+ mon.a:
+ - ceph orch status
+ - ceph orch ps
+ - ceph orch ls
+ - ceph orch host ls
+ - ceph orch device ls
diff --git a/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/.qa b/qa/suites/crimson-rados-experimental/thrash/objectstore/.qa
index a602a0353e7..a602a0353e7 120000
--- a/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/.qa
+++ b/qa/suites/crimson-rados-experimental/thrash/objectstore/.qa
diff --git a/qa/suites/crimson-rados-experimental/thrash/objectstore/seastore.yaml b/qa/suites/crimson-rados-experimental/thrash/objectstore/seastore.yaml
new file mode 120000
index 00000000000..61e26e7acf8
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/objectstore/seastore.yaml
@@ -0,0 +1 @@
+.qa/config/crimson_seastore.yaml \ No newline at end of file
diff --git a/qa/suites/rados/rest/.qa b/qa/suites/crimson-rados-experimental/thrash/thrashers/.qa
index a602a0353e7..a602a0353e7 120000
--- a/qa/suites/rados/rest/.qa
+++ b/qa/suites/crimson-rados-experimental/thrash/thrashers/.qa
diff --git a/qa/suites/crimson-rados-experimental/thrash/thrashers/default.yaml b/qa/suites/crimson-rados-experimental/thrash/thrashers/default.yaml
new file mode 100644
index 00000000000..aa44b6101ff
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/thrashers/default.yaml
@@ -0,0 +1,34 @@
+overrides:
+ ceph:
+ log-ignorelist:
+ - but it is still running
+ - objects unfound and apparently lost
+ conf:
+ osd:
+ osd debug reject backfill probability: .3
+ osd scrub min interval: 60
+ osd scrub max interval: 120
+ osd max backfills: 3
+ osd snap trim sleep: 2
+ osd delete sleep: 1
+ mon:
+ mon min osdmap epochs: 50
+ paxos service trim min: 10
+ # prune full osdmaps regularly
+ mon osdmap full prune min: 15
+ mon osdmap full prune interval: 2
+ mon osdmap full prune txsize: 2
+tasks:
+- thrashosds:
+ timeout: 2400
+ dump_ops_enable: false
+ sighup_delay: 0
+ min_in: 3
+ noscrub_toggle_delay: 0
+ chance_thrash_pg_upmap: 0
+ reweight_osd: 0
+ thrash_primary_affinity: false
+ ceph_objectstore_tool: false
+ chance_inject_pause_short: 0
+ chance_thrash_cluster_full: 0
+ chance_reset_purged_snaps_last: 0
diff --git a/qa/suites/crimson-rados-experimental/thrash/thrashosds-health.yaml b/qa/suites/crimson-rados-experimental/thrash/thrashosds-health.yaml
new file mode 120000
index 00000000000..9124eb1aa29
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/thrashosds-health.yaml
@@ -0,0 +1 @@
+.qa/tasks/thrashosds-health.yaml \ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/.qa b/qa/suites/crimson-rados-experimental/thrash/workloads/.qa
new file mode 120000
index 00000000000..a602a0353e7
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/.qa
@@ -0,0 +1 @@
+../.qa/ \ No newline at end of file
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/admin_socket_objecter_requests.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/admin_socket_objecter_requests.yaml
new file mode 100644
index 00000000000..8c9764ade84
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/admin_socket_objecter_requests.yaml
@@ -0,0 +1,13 @@
+overrides:
+ ceph:
+ conf:
+ client.0:
+ admin socket: /var/run/ceph/ceph-$name.asok
+tasks:
+- radosbench:
+ clients: [client.0]
+ time: 150
+- admin_socket:
+ client.0:
+ objecter_requests:
+ test: "http://git.ceph.com/?p={repo};a=blob_plain;f=src/test/admin_socket/objecter_requests;hb={branch}"
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/pool-snaps-few-objects.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/pool-snaps-few-objects.yaml
new file mode 100644
index 00000000000..d35e8421ab4
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/pool-snaps-few-objects.yaml
@@ -0,0 +1,20 @@
+overrides:
+ conf:
+ osd:
+ osd deep scrub update digest min age: 0
+tasks:
+- rados:
+ clients: [client.0]
+ ops: 4000
+ objects: 50
+ pool_snaps: true
+ op_weights:
+ read: 100
+ write: 100
+ delete: 50
+ snap_create: 50
+ snap_remove: 50
+ rollback: 0
+ # TODO: CEPH_OSD_OP_COPY_FROM
+ copy_from: 0
+
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/radosbench-high-concurrency.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/radosbench-high-concurrency.yaml
new file mode 100644
index 00000000000..902c4b56a1e
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/radosbench-high-concurrency.yaml
@@ -0,0 +1,49 @@
+overrides:
+ ceph:
+ conf:
+ client.0:
+ debug ms: 1
+ debug objecter: 20
+ debug rados: 20
+tasks:
+- full_sequential:
+ - radosbench:
+ clients: [client.0]
+ concurrency: 128
+ size: 8192
+ time: 90
+ - radosbench:
+ clients: [client.0]
+ concurrency: 128
+ size: 8192
+ time: 90
+ - radosbench:
+ clients: [client.0]
+ concurrency: 128
+ size: 8192
+ time: 90
+ - radosbench:
+ clients: [client.0]
+ concurrency: 128
+ size: 8192
+ time: 90
+ - radosbench:
+ clients: [client.0]
+ concurrency: 128
+ size: 8192
+ time: 90
+ - radosbench:
+ clients: [client.0]
+ concurrency: 128
+ size: 8192
+ time: 90
+ - radosbench:
+ clients: [client.0]
+ concurrency: 128
+ size: 8192
+ time: 90
+ - radosbench:
+ clients: [client.0]
+ concurrency: 128
+ size: 8192
+ time: 90
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/radosbench.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/radosbench.yaml
new file mode 100644
index 00000000000..071f55e3928
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/radosbench.yaml
@@ -0,0 +1,24 @@
+overrides:
+ ceph:
+ conf:
+ client.0:
+ debug ms: 1
+ debug objecter: 20
+ debug rados: 20
+tasks:
+- full_sequential:
+ - radosbench:
+ clients: [client.0]
+ time: 90
+ - radosbench:
+ clients: [client.0]
+ time: 90
+ - radosbench:
+ clients: [client.0]
+ time: 90
+ - radosbench:
+ clients: [client.0]
+ time: 90
+ - radosbench:
+ clients: [client.0]
+ time: 90
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/small-objects-balanced.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/small-objects-balanced.yaml
new file mode 100644
index 00000000000..afe04229898
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/small-objects-balanced.yaml
@@ -0,0 +1,24 @@
+overrides:
+ ceph:
+ crush_tunables: jewel
+tasks:
+- rados:
+ clients: [client.0]
+ ops: 400000
+ max_seconds: 600
+ max_in_flight: 64
+ objects: 1024
+ size: 16384
+ balance_reads: true
+ max_attr_len: 8192
+ op_weights:
+ read: 100
+ write: 100
+ delete: 50
+ snap_create: 50
+ snap_remove: 50
+ rollback: 0
+ # TODO: CEPH_OSD_OP_COPY_FROM
+ copy_from: 0
+ setattr: 25
+ rmattr: 25
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/small-objects-localized.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/small-objects-localized.yaml
new file mode 100644
index 00000000000..445b582ea42
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/small-objects-localized.yaml
@@ -0,0 +1,24 @@
+overrides:
+ ceph:
+ crush_tunables: jewel
+tasks:
+- rados:
+ clients: [client.0]
+ ops: 400000
+ max_seconds: 600
+ max_in_flight: 64
+ objects: 1024
+ size: 16384
+ localize_reads: true
+ max_attr_len: 8192
+ op_weights:
+ read: 100
+ write: 100
+ delete: 50
+ snap_create: 50
+ snap_remove: 50
+ rollback: 0
+ # TODO: CEPH_OSD_OP_COPY_FROM
+ copy_from: 0
+ setattr: 25
+ rmattr: 25
diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-balanced.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/small-objects.yaml
index af0ac39310e..e7e8070fd76 100644
--- a/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-balanced.yaml
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/small-objects.yaml
@@ -1,3 +1,6 @@
+overrides:
+ ceph:
+ crush_tunables: jewel
tasks:
- rados:
clients: [client.0]
@@ -6,16 +9,15 @@ tasks:
max_in_flight: 64
objects: 1024
size: 16384
- ec_pool: true
- balanced_reads: true
+ max_attr_len: 8192
op_weights:
read: 100
- write: 0
- append: 100
+ write: 100
delete: 50
snap_create: 50
snap_remove: 50
- rollback: 50
- copy_from: 50
+ rollback: 0
+ # TODO: CEPH_OSD_OP_COPY_FROM
+ copy_from: 0
setattr: 25
rmattr: 25
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects-balanced.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects-balanced.yaml
new file mode 100644
index 00000000000..1161c3cc253
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects-balanced.yaml
@@ -0,0 +1,15 @@
+tasks:
+- rados:
+ clients: [client.0]
+ ops: 4000
+ objects: 50
+ balance_reads: true
+ op_weights:
+ read: 100
+ write: 100
+ delete: 50
+ snap_create: 50
+ snap_remove: 50
+ rollback: 0
+ # TODO: CEPH_OSD_OP_COPY_FROM
+ copy_from: 0
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects-localized.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects-localized.yaml
new file mode 100644
index 00000000000..80af0def0e4
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects-localized.yaml
@@ -0,0 +1,15 @@
+tasks:
+- rados:
+ clients: [client.0]
+ ops: 4000
+ objects: 50
+ localize_reads: true
+ op_weights:
+ read: 100
+ write: 100
+ delete: 50
+ snap_create: 50
+ snap_remove: 50
+ rollback: 0
+ # TODO: CEPH_OSD_OP_COPY_FROM
+ copy_from: 0
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects.yaml
new file mode 100644
index 00000000000..0694ffcd0d6
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/snaps-few-objects.yaml
@@ -0,0 +1,14 @@
+tasks:
+- rados:
+ clients: [client.0]
+ ops: 4000
+ objects: 50
+ op_weights:
+ read: 100
+ write: 100
+ delete: 50
+ snap_create: 50
+ snap_remove: 50
+ rollback: 0
+ # TODO: CEPH_OSD_OP_COPY_FROM
+ copy_from: 0
diff --git a/qa/suites/crimson-rados-experimental/thrash/workloads/write_fadvise_dontneed.yaml b/qa/suites/crimson-rados-experimental/thrash/workloads/write_fadvise_dontneed.yaml
new file mode 100644
index 00000000000..606dcae6922
--- /dev/null
+++ b/qa/suites/crimson-rados-experimental/thrash/workloads/write_fadvise_dontneed.yaml
@@ -0,0 +1,8 @@
+tasks:
+- rados:
+ clients: [client.0]
+ ops: 4000
+ objects: 500
+ write_fadvise_dontneed: true
+ op_weights:
+ write: 100
diff --git a/qa/suites/crimson-rados/basic/objectstore/bluestore.yaml b/qa/suites/crimson-rados/basic/objectstore/bluestore.yaml
index e84f396e4b2..481e393be4a 120000
--- a/qa/suites/crimson-rados/basic/objectstore/bluestore.yaml
+++ b/qa/suites/crimson-rados/basic/objectstore/bluestore.yaml
@@ -1 +1 @@
-.qa/config/bluestore.yaml \ No newline at end of file
+.qa/config/crimson_bluestore.yaml \ No newline at end of file
diff --git a/qa/suites/crimson-rados/basic/objectstore/seastore.yaml b/qa/suites/crimson-rados/basic/objectstore/seastore.yaml
index 6a70c381709..61e26e7acf8 120000
--- a/qa/suites/crimson-rados/basic/objectstore/seastore.yaml
+++ b/qa/suites/crimson-rados/basic/objectstore/seastore.yaml
@@ -1 +1 @@
-.qa/config/seastore.yaml \ No newline at end of file
+.qa/config/crimson_seastore.yaml \ No newline at end of file
diff --git a/qa/suites/crimson-rados/basic/tasks/rados_python.yaml b/qa/suites/crimson-rados/basic/tasks/rados_python.yaml
index a6af2957119..1302e14f21a 100644
--- a/qa/suites/crimson-rados/basic/tasks/rados_python.yaml
+++ b/qa/suites/crimson-rados/basic/tasks/rados_python.yaml
@@ -17,4 +17,4 @@ tasks:
timeout: 1h
clients:
client.0:
- - rados/test_python.sh -m 'not (tier or ec)'
+ - rados/test_python.sh -m 'not (wait or tier or ec)'
diff --git a/qa/suites/crimson-rados/perf/objectstore/bluestore.yaml b/qa/suites/crimson-rados/perf/objectstore/bluestore.yaml
index e84f396e4b2..481e393be4a 120000
--- a/qa/suites/crimson-rados/perf/objectstore/bluestore.yaml
+++ b/qa/suites/crimson-rados/perf/objectstore/bluestore.yaml
@@ -1 +1 @@
-.qa/config/bluestore.yaml \ No newline at end of file
+.qa/config/crimson_bluestore.yaml \ No newline at end of file
diff --git a/qa/suites/crimson-rados/perf/objectstore/seastore.yaml b/qa/suites/crimson-rados/perf/objectstore/seastore.yaml
index 6a70c381709..61e26e7acf8 120000
--- a/qa/suites/crimson-rados/perf/objectstore/seastore.yaml
+++ b/qa/suites/crimson-rados/perf/objectstore/seastore.yaml
@@ -1 +1 @@
-.qa/config/seastore.yaml \ No newline at end of file
+.qa/config/crimson_seastore.yaml \ No newline at end of file
diff --git a/qa/suites/crimson-rados/rbd/objectstore/bluestore.yaml b/qa/suites/crimson-rados/rbd/objectstore/bluestore.yaml
index e84f396e4b2..481e393be4a 120000
--- a/qa/suites/crimson-rados/rbd/objectstore/bluestore.yaml
+++ b/qa/suites/crimson-rados/rbd/objectstore/bluestore.yaml
@@ -1 +1 @@
-.qa/config/bluestore.yaml \ No newline at end of file
+.qa/config/crimson_bluestore.yaml \ No newline at end of file
diff --git a/qa/suites/crimson-rados/rbd/objectstore/seastore.yaml b/qa/suites/crimson-rados/rbd/objectstore/seastore.yaml
index 6a70c381709..61e26e7acf8 120000
--- a/qa/suites/crimson-rados/rbd/objectstore/seastore.yaml
+++ b/qa/suites/crimson-rados/rbd/objectstore/seastore.yaml
@@ -1 +1 @@
-.qa/config/seastore.yaml \ No newline at end of file
+.qa/config/crimson_seastore.yaml \ No newline at end of file
diff --git a/qa/suites/crimson-rados/singleton/objectstore b/qa/suites/crimson-rados/singleton/objectstore
deleted file mode 120000
index dbccf5ad928..00000000000
--- a/qa/suites/crimson-rados/singleton/objectstore
+++ /dev/null
@@ -1 +0,0 @@
-../thrash/objectstore \ No newline at end of file
diff --git a/qa/suites/crimson-rados/singleton/objectstore/.qa b/qa/suites/crimson-rados/singleton/objectstore/.qa
new file mode 120000
index 00000000000..a602a0353e7
--- /dev/null
+++ b/qa/suites/crimson-rados/singleton/objectstore/.qa
@@ -0,0 +1 @@
+../.qa/ \ No newline at end of file
diff --git a/qa/suites/crimson-rados/singleton/objectstore/bluestore.yaml b/qa/suites/crimson-rados/singleton/objectstore/bluestore.yaml
new file mode 120000
index 00000000000..481e393be4a
--- /dev/null
+++ b/qa/suites/crimson-rados/singleton/objectstore/bluestore.yaml
@@ -0,0 +1 @@
+.qa/config/crimson_bluestore.yaml \ No newline at end of file
diff --git a/qa/suites/crimson-rados/singleton/objectstore/seastore.yaml b/qa/suites/crimson-rados/singleton/objectstore/seastore.yaml
new file mode 120000
index 00000000000..61e26e7acf8
--- /dev/null
+++ b/qa/suites/crimson-rados/singleton/objectstore/seastore.yaml
@@ -0,0 +1 @@
+.qa/config/crimson_seastore.yaml \ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash_simple/1-pg-log-overrides/short_pg_log.yaml.disabled b/qa/suites/crimson-rados/thrash/1-pg-log-overrides/short_pg_log.yaml
index abd86d7d986..abd86d7d986 120000
--- a/qa/suites/crimson-rados/thrash_simple/1-pg-log-overrides/short_pg_log.yaml.disabled
+++ b/qa/suites/crimson-rados/thrash/1-pg-log-overrides/short_pg_log.yaml
diff --git a/qa/suites/crimson-rados/thrash/objectstore/bluestore.yaml b/qa/suites/crimson-rados/thrash/objectstore/bluestore.yaml
index e84f396e4b2..481e393be4a 120000
--- a/qa/suites/crimson-rados/thrash/objectstore/bluestore.yaml
+++ b/qa/suites/crimson-rados/thrash/objectstore/bluestore.yaml
@@ -1 +1 @@
-.qa/config/bluestore.yaml \ No newline at end of file
+.qa/config/crimson_bluestore.yaml \ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash/objectstore/seastore.yaml.disabled b/qa/suites/crimson-rados/thrash/objectstore/seastore.yaml.disabled
new file mode 120000
index 00000000000..61e26e7acf8
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash/objectstore/seastore.yaml.disabled
@@ -0,0 +1 @@
+.qa/config/crimson_seastore.yaml \ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash_simple/1-pg-log-overrides/short_pg_log.yaml b/qa/suites/crimson-rados/thrash_simple/1-pg-log-overrides/short_pg_log.yaml
new file mode 120000
index 00000000000..abd86d7d986
--- /dev/null
+++ b/qa/suites/crimson-rados/thrash_simple/1-pg-log-overrides/short_pg_log.yaml
@@ -0,0 +1 @@
+.qa/overrides/short_pg_log.yaml \ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash_simple/objectstore/bluestore.yaml b/qa/suites/crimson-rados/thrash_simple/objectstore/bluestore.yaml
index e84f396e4b2..481e393be4a 120000
--- a/qa/suites/crimson-rados/thrash_simple/objectstore/bluestore.yaml
+++ b/qa/suites/crimson-rados/thrash_simple/objectstore/bluestore.yaml
@@ -1 +1 @@
-.qa/config/bluestore.yaml \ No newline at end of file
+.qa/config/crimson_bluestore.yaml \ No newline at end of file
diff --git a/qa/suites/crimson-rados/thrash_simple/objectstore/seastore.yaml b/qa/suites/crimson-rados/thrash_simple/objectstore/seastore.yaml
index 6a70c381709..61e26e7acf8 120000
--- a/qa/suites/crimson-rados/thrash_simple/objectstore/seastore.yaml
+++ b/qa/suites/crimson-rados/thrash_simple/objectstore/seastore.yaml
@@ -1 +1 @@
-.qa/config/seastore.yaml \ No newline at end of file
+.qa/config/crimson_seastore.yaml \ No newline at end of file
diff --git a/qa/suites/fs/multifs/tasks/failover.yaml b/qa/suites/fs/multifs/tasks/failover.yaml
index 55dde639c23..b7a0338566c 100644
--- a/qa/suites/fs/multifs/tasks/failover.yaml
+++ b/qa/suites/fs/multifs/tasks/failover.yaml
@@ -8,6 +8,7 @@ overrides:
- \(MDS_DAMAGE\)
- \(FS_DEGRADED\)
- \(MDS_CACHE_OVERSIZED\)
+ - \(MDS_ESTIMATED_REPLAY_TIME\)
ceph-fuse:
disabled: true
tasks:
diff --git a/qa/suites/fs/nfs/tasks/nfs.yaml b/qa/suites/fs/nfs/tasks/nfs.yaml
index aa966bff214..2dd668c9f88 100644
--- a/qa/suites/fs/nfs/tasks/nfs.yaml
+++ b/qa/suites/fs/nfs/tasks/nfs.yaml
@@ -1,3 +1,10 @@
+overrides:
+ install:
+ extra_system_packages:
+ rpm:
+ - fio
+ deb:
+ - fio
tasks:
- cephfs_test_runner:
modules:
diff --git a/qa/suites/fs/thrash/workloads/overrides/% b/qa/suites/fs/thrash/workloads/overrides/%
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/overrides/%
diff --git a/qa/suites/fs/thrash/workloads/overrides/prefetch_oft_dirfrags/.qa b/qa/suites/fs/thrash/workloads/overrides/prefetch_oft_dirfrags/.qa
new file mode 120000
index 00000000000..a602a0353e7
--- /dev/null
+++ b/qa/suites/fs/thrash/workloads/overrides/prefetch_oft_dirfrags/.qa
@@ -0,0 +1 @@
+../.qa/ \ No newline at end of file
diff --git a/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/no.yaml b/qa/suites/fs/thrash/workloads/overrides/prefetch_oft_dirfrags/no.yaml
index 91b45367934..91b45367934 100644
--- a/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/no.yaml
+++ b/qa/suites/fs/thrash/workloads/overrides/prefetch_oft_dirfrags/no.yaml
diff --git a/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/yes.yaml b/qa/suites/fs/thrash/workloads/overrides/prefetch_oft_dirfrags/yes.yaml
index bd202f988c8..bd202f988c8 100644
--- a/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/yes.yaml
+++ b/qa/suites/fs/thrash/workloads/overrides/prefetch_oft_dirfrags/yes.yaml
diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/kernel.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/kernel.yaml
new file mode 120000
index 00000000000..a7f7b735665
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/old_client/kernel.yaml
@@ -0,0 +1 @@
+.qa/cephfs/begin/3-kernel.yaml \ No newline at end of file
diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/kernel.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/kernel.yaml
new file mode 120000
index 00000000000..a7f7b735665
--- /dev/null
+++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/kernel.yaml
@@ -0,0 +1 @@
+.qa/cephfs/begin/3-kernel.yaml \ No newline at end of file
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/kernel.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/kernel.yaml
new file mode 120000
index 00000000000..a7f7b735665
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/kernel.yaml
@@ -0,0 +1 @@
+.qa/cephfs/begin/3-kernel.yaml \ No newline at end of file
diff --git a/qa/suites/fs/upgrade/nofs/kernel.yaml b/qa/suites/fs/upgrade/nofs/kernel.yaml
new file mode 120000
index 00000000000..a7f7b735665
--- /dev/null
+++ b/qa/suites/fs/upgrade/nofs/kernel.yaml
@@ -0,0 +1 @@
+.qa/cephfs/begin/3-kernel.yaml \ No newline at end of file
diff --git a/qa/suites/fs/upgrade/upgraded_client/kernel.yaml b/qa/suites/fs/upgrade/upgraded_client/kernel.yaml
new file mode 120000
index 00000000000..a7f7b735665
--- /dev/null
+++ b/qa/suites/fs/upgrade/upgraded_client/kernel.yaml
@@ -0,0 +1 @@
+.qa/cephfs/begin/3-kernel.yaml \ No newline at end of file
diff --git a/qa/suites/fs/workload/begin/3-kernel.yaml b/qa/suites/fs/workload/begin/3-kernel.yaml
new file mode 120000
index 00000000000..a7f7b735665
--- /dev/null
+++ b/qa/suites/fs/workload/begin/3-kernel.yaml
@@ -0,0 +1 @@
+.qa/cephfs/begin/3-kernel.yaml \ No newline at end of file
diff --git a/qa/suites/fs/workload/tasks/6-workunit/kernel_untar_build.yaml b/qa/suites/fs/workload/tasks/6-workunit/kernel_untar_build.yaml
index 602d3416263..aa327b0cdf5 100644
--- a/qa/suites/fs/workload/tasks/6-workunit/kernel_untar_build.yaml
+++ b/qa/suites/fs/workload/tasks/6-workunit/kernel_untar_build.yaml
@@ -5,6 +5,7 @@ overrides:
- "mds.dir_split"
tasks:
- workunit:
+ timeout: 5h
clients:
all:
- kernel_untar_build.sh
diff --git a/qa/suites/nvmeof/basic/clusters/4-gateways-2-initiator.yaml b/qa/suites/nvmeof/basic/clusters/4-gateways-2-initiator.yaml
index e8f390c3b78..7f20f9f04a8 100644
--- a/qa/suites/nvmeof/basic/clusters/4-gateways-2-initiator.yaml
+++ b/qa/suites/nvmeof/basic/clusters/4-gateways-2-initiator.yaml
@@ -28,3 +28,5 @@ overrides:
mon:
# cephadm can take up to 5 minutes to bring up remaining mons
mon down mkfs grace: 300
+ log-ignorelist:
+ - NVMEOF_SINGLE_GATEWAY
diff --git a/qa/suites/nvmeof/basic/workloads/nvmeof_initiator.yaml b/qa/suites/nvmeof/basic/workloads/nvmeof_initiator.yaml
index 7c97edae552..0416ae2ea4e 100644
--- a/qa/suites/nvmeof/basic/workloads/nvmeof_initiator.yaml
+++ b/qa/suites/nvmeof/basic/workloads/nvmeof_initiator.yaml
@@ -1,7 +1,8 @@
+# runs on default nvmeof image (i.e. DEFAULT_NVMEOF_IMAGE)
tasks:
- nvmeof:
installer: host.a
- gw_image: quay.io/ceph/nvmeof:latest # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest"
+ gw_image: default # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest"
rbd:
pool_name: mypool
image_name_prefix: myimage
diff --git a/qa/suites/nvmeof/basic/workloads/nvmeof_mtls.yaml b/qa/suites/nvmeof/basic/workloads/nvmeof_mtls.yaml
new file mode 100644
index 00000000000..8eb4f6dc63c
--- /dev/null
+++ b/qa/suites/nvmeof/basic/workloads/nvmeof_mtls.yaml
@@ -0,0 +1,36 @@
+tasks:
+- nvmeof:
+ installer: host.a
+ gw_image: quay.io/ceph/nvmeof:latest # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest"
+ rbd:
+ pool_name: mypool
+ image_name_prefix: myimage
+ gateway_config:
+ subsystems_count: 3
+ namespaces_count: 20
+ cli_image: quay.io/ceph/nvmeof-cli:latest
+ create_mtls_secrets: true
+
+- cephadm.wait_for_service:
+ service: nvmeof.mypool.mygroup0
+
+- workunit:
+ no_coverage_and_limits: true
+ timeout: 30m
+ clients:
+ client.0:
+ - nvmeof/setup_subsystem.sh
+ - nvmeof/basic_tests.sh
+ - nvmeof/fio_test.sh --rbd_iostat
+ env:
+ RBD_POOL: mypool
+ RBD_IMAGE_PREFIX: myimage
+ IOSTAT_INTERVAL: '10'
+ RUNTIME: '60'
+
+- workunit:
+ no_coverage_and_limits: true
+ timeout: 30m
+ clients:
+ client.0:
+ - nvmeof/mtls_test.sh
diff --git a/qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml b/qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml
index 9ef37004427..dfe31380bb6 100644
--- a/qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml
+++ b/qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml
@@ -18,6 +18,7 @@ tasks:
clients:
client.0:
- nvmeof/setup_subsystem.sh
+ - nvmeof/basic_tests.sh
env:
RBD_POOL: mypool
RBD_IMAGE_PREFIX: myimage
@@ -27,7 +28,6 @@ tasks:
timeout: 30m
clients:
client.0:
- - nvmeof/basic_tests.sh
- nvmeof/fio_test.sh --rbd_iostat
client.1:
- nvmeof/basic_tests.sh
diff --git a/qa/suites/nvmeof/basic/workloads/nvmeof_scalability.yaml b/qa/suites/nvmeof/basic/workloads/nvmeof_scalability.yaml
index 12cb50b408d..d66b6fc8093 100644
--- a/qa/suites/nvmeof/basic/workloads/nvmeof_scalability.yaml
+++ b/qa/suites/nvmeof/basic/workloads/nvmeof_scalability.yaml
@@ -31,8 +31,11 @@ tasks:
no_coverage_and_limits: true
timeout: 30m
clients:
- client.0:
+ client.3:
- nvmeof/scalability_test.sh nvmeof.a,nvmeof.b
- nvmeof/scalability_test.sh nvmeof.b,nvmeof.c,nvmeof.d
+ - nvmeof/scalability_test.sh nvmeof.b,nvmeof.c
env:
SCALING_DELAYS: '50'
+ RBD_POOL: mypool
+ NVMEOF_GROUP: mygroup0
diff --git a/qa/suites/nvmeof/thrash/gateway-initiator-setup/10-subsys-90-namespace-no_huge_pages.yaml b/qa/suites/nvmeof/thrash/gateway-initiator-setup/10-subsys-90-namespace-no_huge_pages.yaml
new file mode 100644
index 00000000000..83d54cdf5c3
--- /dev/null
+++ b/qa/suites/nvmeof/thrash/gateway-initiator-setup/10-subsys-90-namespace-no_huge_pages.yaml
@@ -0,0 +1,37 @@
+tasks:
+- nvmeof:
+ installer: host.a
+ gw_image: quay.io/ceph/nvmeof:latest # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest"
+ rbd:
+ pool_name: mypool
+ image_name_prefix: myimage
+ gateway_config:
+ subsystems_count: 10
+ namespaces_count: 90 # each subsystem
+ cli_image: quay.io/ceph/nvmeof-cli:latest
+
+- cephadm.wait_for_service:
+ service: nvmeof.mypool.mygroup0
+
+- cephadm.exec:
+ host.a:
+ - ceph orch ls nvmeof --export > /tmp/nvmeof-orig.yaml
+ - cp /tmp/nvmeof-orig.yaml /tmp/nvmeof-no-huge-page.yaml
+ - "sed -i '/ pool: mypool/a\\ spdk_mem_size: 4096' /tmp/nvmeof-no-huge-page.yaml"
+ - cat /tmp/nvmeof-no-huge-page.yaml
+ - ceph orch ls --refresh
+ - ceph orch apply -i /tmp/nvmeof-no-huge-page.yaml
+ - ceph orch redeploy nvmeof.mypool.mygroup0
+
+- cephadm.wait_for_service:
+ service: nvmeof.mypool.mygroup0
+
+- workunit:
+ no_coverage_and_limits: true
+ clients:
+ client.0:
+ - nvmeof/setup_subsystem.sh
+ - nvmeof/basic_tests.sh
+ env:
+ RBD_POOL: mypool
+ RBD_IMAGE_PREFIX: myimage
diff --git a/qa/suites/nvmeof/thrash/gateway-initiator-setup/3-subsys-60-namespace.yaml b/qa/suites/nvmeof/thrash/gateway-initiator-setup/120-subsys-8-namespace.yaml
index b4755a6433b..0f7ac011a60 100644
--- a/qa/suites/nvmeof/thrash/gateway-initiator-setup/3-subsys-60-namespace.yaml
+++ b/qa/suites/nvmeof/thrash/gateway-initiator-setup/120-subsys-8-namespace.yaml
@@ -6,8 +6,8 @@ tasks:
pool_name: mypool
image_name_prefix: myimage
gateway_config:
- subsystems_count: 3
- namespaces_count: 20 # each subsystem
+ subsystems_count: 120
+ namespaces_count: 8 # each subsystem
cli_image: quay.io/ceph/nvmeof-cli:latest
- cephadm.wait_for_service:
diff --git a/qa/suites/nvmeof/thrash/thrashers/nvmeof_mon_thrash.yaml b/qa/suites/nvmeof/thrash/thrashers/nvmeof_mon_thrash.yaml
index 6a5bd1d754e..46037784d31 100644
--- a/qa/suites/nvmeof/thrash/thrashers/nvmeof_mon_thrash.yaml
+++ b/qa/suites/nvmeof/thrash/thrashers/nvmeof_mon_thrash.yaml
@@ -8,6 +8,10 @@ overrides:
- out of quorum
# nvmeof daemon thrashing
- CEPHADM_FAILED_DAEMON
+ - NVMEOF_SINGLE_GATEWAY
+ - NVMEOF_GATEWAY_DOWN
+ - are in unavailable state
+ - is unavailable
- is in error state
- failed cephadm daemon
diff --git a/qa/suites/nvmeof/thrash/thrashers/nvmeof_thrash.yaml b/qa/suites/nvmeof/thrash/thrashers/nvmeof_thrash.yaml
index 422c821536a..b58dc14d87b 100644
--- a/qa/suites/nvmeof/thrash/thrashers/nvmeof_thrash.yaml
+++ b/qa/suites/nvmeof/thrash/thrashers/nvmeof_thrash.yaml
@@ -3,9 +3,14 @@ overrides:
log-ignorelist:
# nvmeof daemon thrashing
- CEPHADM_FAILED_DAEMON
+ - NVMEOF_SINGLE_GATEWAY
+ - NVMEOF_GATEWAY_DOWN
+ - are in unavailable state
+ - is unavailable
- is in error state
- failed cephadm daemon
tasks:
- nvmeof.thrash:
checker_host: 'client.0'
+ randomize: False
diff --git a/qa/suites/nvmeof/thrash/workloads/fio.yaml b/qa/suites/nvmeof/thrash/workloads/fio.yaml
index b042b92d6ae..f9a0d0ebde5 100644
--- a/qa/suites/nvmeof/thrash/workloads/fio.yaml
+++ b/qa/suites/nvmeof/thrash/workloads/fio.yaml
@@ -1,11 +1,11 @@
tasks:
- workunit:
no_coverage_and_limits: true
- timeout: 30m
+ timeout: 60m
clients:
client.0:
- - nvmeof/fio_test.sh --rbd_iostat
+ - nvmeof/fio_test.sh --random_devices 200
env:
RBD_POOL: mypool
IOSTAT_INTERVAL: '10'
- RUNTIME: '600'
+ RUNTIME: '1800'
diff --git a/qa/suites/orch/cephadm/smb/tasks/deploy_smb_mgr_clustering_ips.yaml b/qa/suites/orch/cephadm/smb/tasks/deploy_smb_mgr_clustering_ips.yaml
new file mode 100644
index 00000000000..3bbf30ea427
--- /dev/null
+++ b/qa/suites/orch/cephadm/smb/tasks/deploy_smb_mgr_clustering_ips.yaml
@@ -0,0 +1,91 @@
+roles:
+# Test is for basic smb deployment & functionality. one node cluster is OK
+- - host.a
+ - mon.a
+ - mgr.x
+ - osd.0
+ - osd.1
+ - client.0
+- - host.b
+ - mon.b
+ - osd.2
+ - osd.3
+- - host.c
+ - mon.c
+ - osd.4
+ - osd.5
+# Reserve a host for acting as a domain controller and smb client
+- - host.d
+ - cephadm.exclude
+overrides:
+ ceph:
+ log-only-match:
+ - CEPHADM_
+tasks:
+- cephadm.configure_samba_client_container:
+ role: host.d
+- vip:
+ count: 1
+- cephadm:
+
+- cephadm.shell:
+ host.a:
+ - ceph fs volume create cephfs
+- cephadm.wait_for_service:
+ service: mds.cephfs
+
+- cephadm.shell:
+ host.a:
+ # add subvolgroup & subvolumes for test
+ - cmd: ceph fs subvolumegroup create cephfs smb
+ - cmd: ceph fs subvolume create cephfs sv1 --group-name=smb --mode=0777
+ - cmd: ceph fs subvolume create cephfs sv2 --group-name=smb --mode=0777
+ # set up smb cluster and shares
+ - cmd: ceph mgr module enable smb
+ - cmd: sleep 30
+ - cmd: >
+ ceph smb cluster create modusr1 user
+ --define-user-pass=user1%t3stP4ss1
+ --placement=count:3
+ --clustering=default
+ --public_addrs={{VIP0}}/{{VIPPREFIXLEN}}
+ - cmd: ceph smb share create modusr1 share1 cephfs / --subvolume=smb/sv1
+ - cmd: ceph smb share create modusr1 share2 cephfs / --subvolume=smb/sv2
+# Wait for the smb service to start
+- cephadm.wait_for_service:
+ service: smb.modusr1
+
+# Check if shares exist
+- cephadm.exec:
+ host.d:
+ - sleep 30
+ - "{{ctx.samba_client_container_cmd|join(' ')}} smbclient -U user1%t3stP4ss1 //{{'host.a'|role_to_remote|attr('ip_address')}}/share1 -c ls"
+ - "{{ctx.samba_client_container_cmd|join(' ')}} smbclient -U user1%t3stP4ss1 //{{'host.a'|role_to_remote|attr('ip_address')}}/share2 -c ls"
+
+# verify CTDB is healthy, cluster well formed
+- cephadm.exec:
+ host.a:
+ - "{{ctx.cephadm}} ls --no-detail | {{ctx.cephadm}} shell jq -r 'map(select(.name | startswith(\"smb.modusr1\")))[-1].name' > /tmp/svcname"
+ - "{{ctx.cephadm}} enter -n $(cat /tmp/svcname) ctdb status > /tmp/ctdb_status"
+ - cat /tmp/ctdb_status
+ - grep 'pnn:0 .*OK' /tmp/ctdb_status
+ - grep 'pnn:1 .*OK' /tmp/ctdb_status
+ - grep 'pnn:2 .*OK' /tmp/ctdb_status
+ - grep 'Number of nodes:3' /tmp/ctdb_status
+ - rm -rf /tmp/svcname /tmp/ctdb_status
+
+# Test the assigned VIP
+- cephadm.exec:
+ host.d:
+ - sleep 30
+ - "{{ctx.samba_client_container_cmd|join(' ')}} smbclient -U user1%t3stP4ss1 //{{VIP0}}/share1 -c ls"
+ - "{{ctx.samba_client_container_cmd|join(' ')}} smbclient -U user1%t3stP4ss1 //{{VIP0}}/share2 -c ls"
+
+- cephadm.shell:
+ host.a:
+ - cmd: ceph smb share rm modusr1 share2
+ - cmd: ceph smb share rm modusr1 share1
+ - cmd: ceph smb cluster rm modusr1
+# Wait for the smb service to be removed
+- cephadm.wait_for_service_not_present:
+ service: smb.modusr1
diff --git a/qa/suites/rados/mgr/tasks/4-units/module_selftest.yaml b/qa/suites/rados/mgr/tasks/4-units/module_selftest.yaml
index 1eb4a184dca..e2a2ca03cc9 100644
--- a/qa/suites/rados/mgr/tasks/4-units/module_selftest.yaml
+++ b/qa/suites/rados/mgr/tasks/4-units/module_selftest.yaml
@@ -6,7 +6,6 @@ overrides:
- objects misplaced
- Synthetic exception in serve
- influxdb python module not found
- - \(MGR_ZABBIX_
- foo bar
- Failed to open Telegraf
- evicting unresponsive client
diff --git a/qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml b/qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml
index 372bf2561fa..8b3c4c11ac6 100644
--- a/qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml
+++ b/qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml
@@ -15,6 +15,7 @@ overrides:
# causing tests to fail due to health warns, even if
# the tests themselves are successful.
- \(OSDMAP_FLAGS\)
+ - \(PG_DEGRADED\)
tasks:
- workunit:
clients:
diff --git a/qa/suites/rados/objectstore/backends/ceph_test_bluefs.yaml b/qa/suites/rados/objectstore/backends/ceph_test_bluefs.yaml
new file mode 100644
index 00000000000..7cd47898544
--- /dev/null
+++ b/qa/suites/rados/objectstore/backends/ceph_test_bluefs.yaml
@@ -0,0 +1,8 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, client.0]
+tasks:
+- install:
+- exec:
+ client.0:
+ - mkdir $TESTDIR/ceph_test_bluefs && cd $TESTDIR/ceph_test_bluefs && ceph_test_bluefs --log-file $TESTDIR/archive/ceph_test_bluefs.log --debug-bluefs 5/20 --gtest_catch_exceptions=0
+ - rm -rf $TESTDIR/ceph_test_bluefs
diff --git a/qa/suites/rados/rest/mgr-restful.yaml b/qa/suites/rados/rest/mgr-restful.yaml
deleted file mode 100644
index 4901f401d30..00000000000
--- a/qa/suites/rados/rest/mgr-restful.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-openstack:
-- volumes: # attached to each instance
- count: 3
- size: 10 # GB
-roles:
-- [mon.a, mgr.x, osd.0, osd.1, osd.2, mds.a, client.a]
-tasks:
-- install:
-- ceph:
- log-ignorelist:
- - overall HEALTH_
- - \(MGR_DOWN\)
- - \(PG_
- - \(OSD_
- - \(OBJECT_
- - \(OSDMAP_FLAGS\)
- - \(POOL_APP_NOT_ENABLED\)
-- exec:
- mon.a:
- - ceph restful create-key admin
- - ceph restful create-self-signed-cert
- - ceph restful restart
-- workunit:
- clients:
- client.a:
- - rest/test-restful.sh
-- exec:
- mon.a:
- - ceph restful delete-key admin
- - ceph restful list-keys | jq ".admin" | grep null
-
diff --git a/qa/suites/rados/rest/supported-random-distro$ b/qa/suites/rados/rest/supported-random-distro$
deleted file mode 120000
index 7cef21eeffd..00000000000
--- a/qa/suites/rados/rest/supported-random-distro$
+++ /dev/null
@@ -1 +0,0 @@
-../basic/supported-random-distro$ \ No newline at end of file
diff --git a/qa/suites/rados/singleton/all/stretch-mode-5-mons-8-osds.yaml b/qa/suites/rados/singleton/all/stretch-mode-5-mons-8-osds.yaml
new file mode 100644
index 00000000000..69a54b0f1b7
--- /dev/null
+++ b/qa/suites/rados/singleton/all/stretch-mode-5-mons-8-osds.yaml
@@ -0,0 +1,57 @@
+roles:
+- - mon.a
+ - mon.b
+ - mgr.a
+ - mgr.b
+ - osd.0
+ - osd.1
+ - osd.2
+ - osd.3
+- - mon.c
+ - mon.d
+ - mgr.c
+ - mgr.d
+ - osd.4
+ - osd.5
+ - osd.6
+ - osd.7
+- - mon.e
+- - client.0
+
+openstack:
+ - volumes: # attached to each instance
+ count: 3
+ size: 10 # GB
+overrides:
+ ceph:
+ conf:
+ global:
+ osd pool default size: 3
+ osd pool default min size: 2
+ mon:
+ debug mon: 30
+tasks:
+- install:
+- ceph:
+ pre-mgr-commands:
+ - sudo ceph config set mgr mgr_pool false --force
+ log-ignorelist:
+ - \(POOL_
+ - \(CACHE_POOL_
+ - overall HEALTH_
+ - \(PG_AVAILABILITY\)
+ - Reduced data availability
+ - \(PG_DEGRADED\)
+ - \(MON_DOWN\)
+ - \(OSD_DATACENTER_DOWN\)
+ - \(OSD_DOWN\)
+ - \(OSD_HOST_DOWN\)
+
+
+- workunit:
+ clients:
+ client.0:
+ - mon/mon-stretch-mode-5-mons-8-osds.sh
+- cephfs_test_runner:
+ modules:
+ - tasks.stretch_mode_disable_enable
diff --git a/qa/suites/rados/thrash-old-clients/workloads/rbd_cls.yaml b/qa/suites/rados/thrash-old-clients/workloads/rbd_cls.yaml
index a8bbbafece0..b916bed1475 100644
--- a/qa/suites/rados/thrash-old-clients/workloads/rbd_cls.yaml
+++ b/qa/suites/rados/thrash-old-clients/workloads/rbd_cls.yaml
@@ -2,6 +2,9 @@ meta:
- desc: |
rbd object class functional tests
tasks:
-- exec:
- client.2:
- - ceph_test_cls_rbd --gtest_filter=-TestClsRbd.get_features:TestClsRbd.parents:TestClsRbd.mirror
+- workunit:
+ clients:
+ client.2:
+ - cls/test_cls_rbd.sh
+ env:
+ CLS_RBD_GTEST_FILTER: '-TestClsRbd.group_snap_set:TestClsRbd.group_snap_remove'
diff --git a/qa/suites/rados/valgrind-leaks/1-start.yaml b/qa/suites/rados/valgrind-leaks/1-start.yaml
index 1cdd8a688e8..cc8c8e53766 100644
--- a/qa/suites/rados/valgrind-leaks/1-start.yaml
+++ b/qa/suites/rados/valgrind-leaks/1-start.yaml
@@ -12,6 +12,7 @@ overrides:
- overall HEALTH_
- \(PG_
- \(POOL_APP_NOT_ENABLED\)
+ - OSD bench result
conf:
global:
osd heartbeat grace: 40
diff --git a/qa/suites/rados/verify/validater/valgrind.yaml b/qa/suites/rados/verify/validater/valgrind.yaml
index c70893893fd..17cf141b0cd 100644
--- a/qa/suites/rados/verify/validater/valgrind.yaml
+++ b/qa/suites/rados/verify/validater/valgrind.yaml
@@ -26,6 +26,8 @@ overrides:
- \(MON_DOWN\)
- \(SLOW_OPS\)
- slow request
+ - OSD bench result
+ - OSD_DOWN
valgrind:
mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes]
osd: [--tool=memcheck]
diff --git a/qa/suites/rbd/migration/6-prepare/qcow2-https.yaml b/qa/suites/rbd/migration/6-prepare/qcow2-https.yaml
new file mode 100644
index 00000000000..d2072c41a68
--- /dev/null
+++ b/qa/suites/rbd/migration/6-prepare/qcow2-https.yaml
@@ -0,0 +1,8 @@
+tasks:
+ - exec:
+ client.0:
+ - mkdir /home/ubuntu/cephtest/migration
+ - qemu-img create -f qcow2 /home/ubuntu/cephtest/migration/empty.qcow2 1G
+ - echo '{"type":"qcow","stream":{"type":"http","url":"https://download.ceph.com/qa/ubuntu-12.04.qcow2"}}' | rbd migration prepare --import-only --source-spec-path - client.0.0
+ - rbd migration prepare --import-only --source-spec '{"type":"qcow","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/empty.qcow2"}}' client.0.1
+ - rbd migration prepare --import-only --source-spec '{"type":"qcow","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/empty.qcow2"}}' client.0.2
diff --git a/qa/suites/rgw/bucket-logging/% b/qa/suites/rgw/bucket-logging/%
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/%
diff --git a/qa/suites/rgw/bucket-logging/.qa b/qa/suites/rgw/bucket-logging/.qa
new file mode 120000
index 00000000000..a602a0353e7
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/.qa
@@ -0,0 +1 @@
+../.qa/ \ No newline at end of file
diff --git a/qa/suites/rgw/bucket-logging/0-install.yaml b/qa/suites/rgw/bucket-logging/0-install.yaml
new file mode 100644
index 00000000000..6cf82f57476
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/0-install.yaml
@@ -0,0 +1,13 @@
+tasks:
+- install:
+- ceph:
+- openssl_keys:
+- rgw: [client.0]
+- tox: [client.0]
+
+overrides:
+ ceph:
+ conf:
+ global:
+ osd_min_pg_log_entries: 10
+ osd_max_pg_log_entries: 10
diff --git a/qa/suites/rgw/bucket-logging/beast.yaml b/qa/suites/rgw/bucket-logging/beast.yaml
new file mode 120000
index 00000000000..09ced62c42a
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/beast.yaml
@@ -0,0 +1 @@
+.qa/rgw_frontend/beast.yaml \ No newline at end of file
diff --git a/qa/suites/rgw/bucket-logging/fixed-1.yaml b/qa/suites/rgw/bucket-logging/fixed-1.yaml
new file mode 120000
index 00000000000..02df5dd0cd0
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/fixed-1.yaml
@@ -0,0 +1 @@
+.qa/clusters/fixed-1.yaml \ No newline at end of file
diff --git a/qa/suites/rgw/bucket-logging/ignore-pg-availability.yaml b/qa/suites/rgw/bucket-logging/ignore-pg-availability.yaml
new file mode 120000
index 00000000000..32340b1fa8b
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/ignore-pg-availability.yaml
@@ -0,0 +1 @@
+.qa/rgw/ignore-pg-availability.yaml \ No newline at end of file
diff --git a/qa/suites/rgw/bucket-logging/overrides.yaml b/qa/suites/rgw/bucket-logging/overrides.yaml
new file mode 100644
index 00000000000..a448a323d36
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/overrides.yaml
@@ -0,0 +1,10 @@
+overrides:
+ ceph:
+ conf:
+ client:
+ setuser: ceph
+ setgroup: ceph
+ debug rgw: 20
+ rgw bucket logging obj roll time: 5
+ rgw:
+ storage classes: LUKEWARM, FROZEN
diff --git a/qa/suites/rgw/bucket-logging/s3tests-branch.yaml b/qa/suites/rgw/bucket-logging/s3tests-branch.yaml
new file mode 120000
index 00000000000..bdcaca48ae0
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/s3tests-branch.yaml
@@ -0,0 +1 @@
+.qa/rgw/s3tests-branch.yaml \ No newline at end of file
diff --git a/qa/suites/rgw/bucket-logging/supported-distros b/qa/suites/rgw/bucket-logging/supported-distros
new file mode 120000
index 00000000000..78f2991b407
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/supported-distros
@@ -0,0 +1 @@
+.qa/distros/supported-random-distro$/ \ No newline at end of file
diff --git a/qa/suites/rgw/bucket-logging/tasks/+ b/qa/suites/rgw/bucket-logging/tasks/+
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/tasks/+
diff --git a/qa/suites/rgw/bucket-logging/tasks/s3tests.yaml b/qa/suites/rgw/bucket-logging/tasks/s3tests.yaml
new file mode 100644
index 00000000000..c1d3b7192e1
--- /dev/null
+++ b/qa/suites/rgw/bucket-logging/tasks/s3tests.yaml
@@ -0,0 +1,6 @@
+tasks:
+- s3tests:
+ client.0:
+ boto3_extensions: True
+ rgw_server: client.0
+ extra_attrs: ["bucket_logging"]
diff --git a/qa/suites/rgw/crypt/2-kms/barbican.yaml b/qa/suites/rgw/crypt/2-kms/barbican.yaml
index 9bf5fb81131..e3f78810416 100644
--- a/qa/suites/rgw/crypt/2-kms/barbican.yaml
+++ b/qa/suites/rgw/crypt/2-kms/barbican.yaml
@@ -27,7 +27,7 @@ tasks:
- tox: [ client.0 ]
- keystone:
client.0:
- force-branch: stable/2023.1
+ force-branch: stable/2024.1
services:
- name: swift
type: object-store
@@ -68,7 +68,7 @@ tasks:
project: s3
- barbican:
client.0:
- force-branch: stable/2023.1
+ force-branch: stable/2024.1
use-keystone-role: client.0
keystone_authtoken:
auth_plugin: password
diff --git a/qa/suites/rgw/multisite/realms/two-zonegroup.yaml.disabled b/qa/suites/rgw/multisite/realms/two-zonegroup.yaml
index 5e4234236a9..ac2104cdd05 100644
--- a/qa/suites/rgw/multisite/realms/two-zonegroup.yaml.disabled
+++ b/qa/suites/rgw/multisite/realms/two-zonegroup.yaml
@@ -28,4 +28,4 @@ overrides:
- name: b2
endpoints: [c2.client.1]
rgw-multisite-tests:
- args: [tests.py]
+ args: [tests.py, -a, '!fails_with_rgw']
diff --git a/qa/suites/rgw/notifications/tasks/kafka/test_kafka.yaml b/qa/suites/rgw/notifications/tasks/kafka/test_kafka.yaml
index 462570e7727..303f98d540e 100644
--- a/qa/suites/rgw/notifications/tasks/kafka/test_kafka.yaml
+++ b/qa/suites/rgw/notifications/tasks/kafka/test_kafka.yaml
@@ -1,7 +1,7 @@
tasks:
- kafka:
client.0:
- kafka_version: 2.6.0
+ kafka_version: 3.8.1
- notification-tests:
client.0:
extra_attr: ["kafka_test", "data_path_v2_kafka_test"]
diff --git a/qa/suites/rgw/notifications/tasks/kafka_failover/+ b/qa/suites/rgw/notifications/tasks/kafka_failover/+
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/qa/suites/rgw/notifications/tasks/kafka_failover/+
diff --git a/qa/suites/rgw/notifications/tasks/kafka_failover/0-install.yaml b/qa/suites/rgw/notifications/tasks/kafka_failover/0-install.yaml
new file mode 100644
index 00000000000..5c83d5c0d23
--- /dev/null
+++ b/qa/suites/rgw/notifications/tasks/kafka_failover/0-install.yaml
@@ -0,0 +1,20 @@
+tasks:
+- install:
+- ceph:
+- openssl_keys:
+- rgw:
+ client.0:
+
+overrides:
+ install:
+ ceph:
+ extra_system_packages:
+ rpm:
+ - java
+ deb:
+ - default-jre
+ ceph:
+ conf:
+ global:
+ osd_min_pg_log_entries: 10
+ osd_max_pg_log_entries: 10
diff --git a/qa/suites/rgw/notifications/tasks/kafka_failover/supported-distros b/qa/suites/rgw/notifications/tasks/kafka_failover/supported-distros
new file mode 120000
index 00000000000..46280a42a96
--- /dev/null
+++ b/qa/suites/rgw/notifications/tasks/kafka_failover/supported-distros
@@ -0,0 +1 @@
+../../.qa/distros/supported-random-distro$/ \ No newline at end of file
diff --git a/qa/suites/rgw/notifications/tasks/kafka_failover/test_kafka.yaml b/qa/suites/rgw/notifications/tasks/kafka_failover/test_kafka.yaml
new file mode 100644
index 00000000000..01d6fc637de
--- /dev/null
+++ b/qa/suites/rgw/notifications/tasks/kafka_failover/test_kafka.yaml
@@ -0,0 +1,8 @@
+tasks:
+- kafka-failover:
+ client.0:
+ kafka_version: 3.8.1
+- notification-tests:
+ client.0:
+ extra_attr: ["kafka_failover"]
+ rgw_server: client.0
diff --git a/qa/suites/rgw/sts/auth-order/.qa b/qa/suites/rgw/sts/auth-order/.qa
new file mode 120000
index 00000000000..fea2489fdf6
--- /dev/null
+++ b/qa/suites/rgw/sts/auth-order/.qa
@@ -0,0 +1 @@
+../.qa \ No newline at end of file
diff --git a/qa/suites/rgw/sts/auth-order/local-sts.yaml b/qa/suites/rgw/sts/auth-order/local-sts.yaml
new file mode 100644
index 00000000000..2f7dcc6b128
--- /dev/null
+++ b/qa/suites/rgw/sts/auth-order/local-sts.yaml
@@ -0,0 +1,5 @@
+overrides:
+ ceph:
+ conf:
+ client:
+ rgw s3 auth order: local, sts, external
diff --git a/qa/suites/rgw/sts/auth-order/sts-local.yaml b/qa/suites/rgw/sts/auth-order/sts-local.yaml
new file mode 100644
index 00000000000..a7b00d00f0b
--- /dev/null
+++ b/qa/suites/rgw/sts/auth-order/sts-local.yaml
@@ -0,0 +1,5 @@
+overrides:
+ ceph:
+ conf:
+ client:
+ rgw s3 auth order: sts, local, external
diff --git a/qa/suites/rgw/tempest/0-install.yaml b/qa/suites/rgw/tempest/0-install.yaml
index f968db20c2b..b6ef17de4ee 100644
--- a/qa/suites/rgw/tempest/0-install.yaml
+++ b/qa/suites/rgw/tempest/0-install.yaml
@@ -4,7 +4,7 @@ tasks:
- tox: [ client.0 ]
- keystone:
client.0:
- force-branch: stable/2023.1
+ force-branch: stable/2024.1
services:
- name: swift
type: object-store
diff --git a/qa/suites/rgw/tempest/tasks/s3/% b/qa/suites/rgw/tempest/tasks/s3/%
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/qa/suites/rgw/tempest/tasks/s3/%
diff --git a/qa/suites/rgw/tempest/tasks/s3/.qa b/qa/suites/rgw/tempest/tasks/s3/.qa
new file mode 120000
index 00000000000..fea2489fdf6
--- /dev/null
+++ b/qa/suites/rgw/tempest/tasks/s3/.qa
@@ -0,0 +1 @@
+../.qa \ No newline at end of file
diff --git a/qa/suites/rgw/tempest/tasks/s3/auth-order/.qa b/qa/suites/rgw/tempest/tasks/s3/auth-order/.qa
new file mode 120000
index 00000000000..fea2489fdf6
--- /dev/null
+++ b/qa/suites/rgw/tempest/tasks/s3/auth-order/.qa
@@ -0,0 +1 @@
+../.qa \ No newline at end of file
diff --git a/qa/suites/rgw/tempest/tasks/s3/auth-order/external-local.yaml b/qa/suites/rgw/tempest/tasks/s3/auth-order/external-local.yaml
new file mode 100644
index 00000000000..c46a51e0958
--- /dev/null
+++ b/qa/suites/rgw/tempest/tasks/s3/auth-order/external-local.yaml
@@ -0,0 +1,5 @@
+overrides:
+ ceph:
+ conf:
+ client:
+ rgw s3 auth order: sts, external, local
diff --git a/qa/suites/rgw/tempest/tasks/s3/auth-order/local-external.yaml b/qa/suites/rgw/tempest/tasks/s3/auth-order/local-external.yaml
new file mode 100644
index 00000000000..a7b00d00f0b
--- /dev/null
+++ b/qa/suites/rgw/tempest/tasks/s3/auth-order/local-external.yaml
@@ -0,0 +1,5 @@
+overrides:
+ ceph:
+ conf:
+ client:
+ rgw s3 auth order: sts, local, external
diff --git a/qa/suites/rgw/tempest/tasks/s3tests.yaml b/qa/suites/rgw/tempest/tasks/s3/s3tests.yaml
index 4efb579fa83..4efb579fa83 100644
--- a/qa/suites/rgw/tempest/tasks/s3tests.yaml
+++ b/qa/suites/rgw/tempest/tasks/s3/s3tests.yaml
diff --git a/qa/suites/rgw/verify/tasks/s3tests-java.yaml b/qa/suites/rgw/verify/tasks/zzz-s3tests-java.yaml
index 9ad89cc6790..9ad89cc6790 100644
--- a/qa/suites/rgw/verify/tasks/s3tests-java.yaml
+++ b/qa/suites/rgw/verify/tasks/zzz-s3tests-java.yaml
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/start.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/start.yaml
index 57e455ba78d..a0adaecf9b2 100644
--- a/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/start.yaml
+++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/start.yaml
@@ -19,6 +19,20 @@ overrides:
- \(MGR_DOWN\)
- slow request
- \(MON_MSGR2_NOT_ENABLED\)
+ - \(OSD_DOWN\)
+ - \(OSD_HOST_DOWN\)
+ - \(POOL_APP_NOT_ENABLED\)
+ - OSD_DOWN
+ - mons down
+ - mon down
+ - MON_DOWN
+ - out of quorum
+ - PG_DEGRADED
+ - Reduced data availability
+ - Degraded data redundancy
+ - OSDMAP_FLAGS
+ - OSD_ROOT_DOWN
+
conf:
global:
enable experimental unrecoverable data corrupting features: "*"
@@ -30,4 +44,3 @@ roles:
- mgr.x
- osd.0
- osd.1
- - osd.2 \ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/quincy.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/quincy.yaml
index e4897db4d35..48cfa2f756f 100644
--- a/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/quincy.yaml
+++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/quincy.yaml
@@ -18,9 +18,6 @@ tasks:
mon:
mon_warn_on_insecure_global_id_reclaim: false
mon_warn_on_insecure_global_id_reclaim_allowed: false
- log-ignorelist:
- - Not found or unloadable
- - evicting unresponsive client
- exec:
osd.0:
- ceph osd require-osd-release quincy
@@ -30,14 +27,3 @@ overrides:
conf:
mon:
mon warn on osd down out interval zero: false
- log-ignorelist:
- - \(POOL_APP_NOT_ENABLED\)
- - OSD_DOWN
- - mons down
- - mon down
- - MON_DOWN
- - out of quorum
- - PG_DEGRADED
- - Reduced data availability
- - Degraded data redundancy
- - OSDMAP_FLAGS
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/2 - upgrade.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/2 - upgrade.yaml
index 6aa429f18b5..fe4ff9bb113 100644
--- a/qa/suites/upgrade/quincy-x/filestore-remove-check/2 - upgrade.yaml
+++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/2 - upgrade.yaml
@@ -3,14 +3,13 @@ meta:
install upgrade ceph/-x on cluster
restart : mons, osd.*
tasks:
+- print: "**** start install.upgrade of nodes"
- install.upgrade:
- mon.a:
-- exec:
- osd.0:
- - ceph osd require-osd-release quincy
+ all:
- print: "**** done install.upgrade of nodes"
+- print: "**** start ceph.restart of all osds"
- ceph.restart:
- daemons: [mon.a,mgr.x,osd.0,osd.1,osd.2]
+ daemons: [osd.0,osd.1,osd.2]
mon-health-to-clog: false
wait-for-healthy: false
wait-for-osds-up: false
diff --git a/qa/suites/upgrade/quincy-x/parallel/0-start.yaml b/qa/suites/upgrade/quincy-x/parallel/0-start.yaml
index 40fbcefe728..62fb6427f72 100644
--- a/qa/suites/upgrade/quincy-x/parallel/0-start.yaml
+++ b/qa/suites/upgrade/quincy-x/parallel/0-start.yaml
@@ -32,13 +32,22 @@ overrides:
osd:
osd shutdown pgref assert: true
log-ignorelist:
- - \(POOL_APP_NOT_ENABLED\)
+ - do not have an application enabled
+ - application not enabled
+ - or freeform for custom applications
+ - POOL_APP_NOT_ENABLED
+ - is down
- OSD_DOWN
- mons down
- mon down
- MON_DOWN
- out of quorum
+ - PG_AVAILABILITY
- PG_DEGRADED
- Reduced data availability
- Degraded data redundancy
+ - pg .* is stuck inactive
+ - pg .* is .*degraded
+ - FS_DEGRADED
- OSDMAP_FLAGS
+ - OSD_UPGRADE_FINISHED
diff --git a/qa/suites/upgrade/quincy-x/parallel/1-tasks.yaml b/qa/suites/upgrade/quincy-x/parallel/1-tasks.yaml
index e27c7c0f092..f7167975aa9 100644
--- a/qa/suites/upgrade/quincy-x/parallel/1-tasks.yaml
+++ b/qa/suites/upgrade/quincy-x/parallel/1-tasks.yaml
@@ -1,11 +1,8 @@
overrides:
ceph:
log-ignorelist:
- - mons down
- - mon down
- - MON_DOWN
- - out of quorum
- - PG_AVAILABILITY
+ - Telemetry requires re-opt-in
+ - telemetry module includes new collections
tasks:
- install:
branch: quincy
diff --git a/qa/suites/upgrade/quincy-x/parallel/workload/rados_api.yaml b/qa/suites/upgrade/quincy-x/parallel/workload/rados_api.yaml
index 9c2ff9da185..9a0585cc074 100644
--- a/qa/suites/upgrade/quincy-x/parallel/workload/rados_api.yaml
+++ b/qa/suites/upgrade/quincy-x/parallel/workload/rados_api.yaml
@@ -9,4 +9,6 @@ workload:
clients:
client.0:
- cls
+ env:
+ CLS_RBD_GTEST_FILTER: '-TestClsRbd.group_snap_set:TestClsRbd.group_snap_remove'
- print: "**** done end rados_api.yaml"
diff --git a/qa/suites/upgrade/quincy-x/stress-split/1-start.yaml b/qa/suites/upgrade/quincy-x/stress-split/1-start.yaml
index 005514292ce..5641471629e 100644
--- a/qa/suites/upgrade/quincy-x/stress-split/1-start.yaml
+++ b/qa/suites/upgrade/quincy-x/stress-split/1-start.yaml
@@ -1,17 +1,25 @@
overrides:
ceph:
log-ignorelist:
- - \(POOL_APP_NOT_ENABLED\)
+ - do not have an application enabled
+ - application not enabled
+ - or freeform for custom applications
+ - POOL_APP_NOT_ENABLED
+ - is down
- OSD_DOWN
- mons down
- mon down
- MON_DOWN
- out of quorum
+ - PG_AVAILABILITY
- PG_DEGRADED
- Reduced data availability
- Degraded data redundancy
+ - pg .* is stuck inactive
+ - pg .* is .*degraded
+ - FS_DEGRADED
- OSDMAP_FLAGS
- - PG_AVAILABILITY
+ - OSD_UPGRADE_FINISHED
tasks:
- install:
branch: quincy
diff --git a/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-cls.yaml b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-cls.yaml
index b722f187361..a55dddf46f7 100644
--- a/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-cls.yaml
+++ b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-cls.yaml
@@ -7,4 +7,6 @@ first-half-tasks:
clients:
client.0:
- cls/test_cls_rbd.sh
+ env:
+ CLS_RBD_GTEST_FILTER: '-TestClsRbd.group_snap_set:TestClsRbd.group_snap_remove'
- print: "**** done cls/test_cls_rbd.sh 5-workload"
diff --git a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-cls.yaml b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-cls.yaml
index 649b024a476..d54ba8039d0 100644
--- a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-cls.yaml
+++ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-cls.yaml
@@ -7,4 +7,6 @@ stress-tasks:
clients:
client.0:
- cls/test_cls_rbd.sh
+ env:
+ CLS_RBD_GTEST_FILTER: '-TestClsRbd.group_snap_set:TestClsRbd.group_snap_remove'
- print: "**** done cls/test_cls_rbd.sh 5-workload"
diff --git a/qa/suites/upgrade/reef-x/parallel/0-start.yaml b/qa/suites/upgrade/reef-x/parallel/0-start.yaml
index 146bd57960d..62fb6427f72 100644
--- a/qa/suites/upgrade/reef-x/parallel/0-start.yaml
+++ b/qa/suites/upgrade/reef-x/parallel/0-start.yaml
@@ -32,4 +32,22 @@ overrides:
osd:
osd shutdown pgref assert: true
log-ignorelist:
- - PG_DEGRADED
+ - do not have an application enabled
+ - application not enabled
+ - or freeform for custom applications
+ - POOL_APP_NOT_ENABLED
+ - is down
+ - OSD_DOWN
+ - mons down
+ - mon down
+ - MON_DOWN
+ - out of quorum
+ - PG_AVAILABILITY
+ - PG_DEGRADED
+ - Reduced data availability
+ - Degraded data redundancy
+ - pg .* is stuck inactive
+ - pg .* is .*degraded
+ - FS_DEGRADED
+ - OSDMAP_FLAGS
+ - OSD_UPGRADE_FINISHED
diff --git a/qa/suites/upgrade/reef-x/parallel/1-tasks.yaml b/qa/suites/upgrade/reef-x/parallel/1-tasks.yaml
index ce4e0cc228b..b5160c2dd00 100644
--- a/qa/suites/upgrade/reef-x/parallel/1-tasks.yaml
+++ b/qa/suites/upgrade/reef-x/parallel/1-tasks.yaml
@@ -1,12 +1,8 @@
overrides:
ceph:
log-ignorelist:
- - mons down
- - mon down
- - MON_DOWN
- - out of quorum
- - PG_AVAILABILITY
- - PG_DEGRADED
+ - Telemetry requires re-opt-in
+ - telemetry module includes new collections
tasks:
- install:
branch: reef
diff --git a/qa/suites/upgrade/reef-x/parallel/overrides/ignorelist_health.yaml b/qa/suites/upgrade/reef-x/parallel/overrides/ignorelist_health.yaml
new file mode 100644
index 00000000000..fa93b2f2ece
--- /dev/null
+++ b/qa/suites/upgrade/reef-x/parallel/overrides/ignorelist_health.yaml
@@ -0,0 +1,19 @@
+overrides:
+ ceph:
+ log-ignorelist:
+ - MDS_ALL_DOWN
+ - MDS_UP_LESS_THAN_MAX
+ - OSD_SLOW_PING_TIME
+ - reached quota
+ - running out of quota
+ - overall HEALTH_
+ - CACHE_POOL_NO_HIT_SET
+ - pool\(s\) full
+ - POOL_FULL
+ - SMALLER_PGP_NUM
+ - SLOW_OPS
+ - CACHE_POOL_NEAR_FULL
+ - OBJECT_MISPLACED
+ - slow request
+ - noscrub
+ - nodeep-scrub
diff --git a/qa/suites/upgrade/reef-x/parallel/workload/rados_api.yaml b/qa/suites/upgrade/reef-x/parallel/workload/rados_api.yaml
index a46e34db5dd..79cf1a96601 100644
--- a/qa/suites/upgrade/reef-x/parallel/workload/rados_api.yaml
+++ b/qa/suites/upgrade/reef-x/parallel/workload/rados_api.yaml
@@ -9,4 +9,6 @@ workload:
clients:
client.0:
- cls
+ env:
+ CLS_RBD_GTEST_FILTER: '-TestClsRbd.group_snap_set:TestClsRbd.group_snap_remove'
- print: "**** done end rados_api.yaml"
diff --git a/qa/suites/upgrade/reef-x/stress-split/1-start.yaml b/qa/suites/upgrade/reef-x/stress-split/1-start.yaml
index 992f9e1bc36..59ccfe2cd02 100644
--- a/qa/suites/upgrade/reef-x/stress-split/1-start.yaml
+++ b/qa/suites/upgrade/reef-x/stress-split/1-start.yaml
@@ -1,11 +1,25 @@
overrides:
ceph:
log-ignorelist:
+ - do not have an application enabled
+ - application not enabled
+ - or freeform for custom applications
+ - POOL_APP_NOT_ENABLED
+ - is down
+ - OSD_DOWN
- mons down
- mon down
- MON_DOWN
- out of quorum
- PG_AVAILABILITY
+ - PG_DEGRADED
+ - Reduced data availability
+ - Degraded data redundancy
+ - pg .* is stuck inactive
+ - pg .* is .*degraded
+ - FS_DEGRADED
+ - OSDMAP_FLAGS
+ - OSD_UPGRADE_FINISHED
tasks:
- install:
branch: reef
diff --git a/qa/suites/upgrade/reef-x/stress-split/2-first-half-tasks/rbd-cls.yaml b/qa/suites/upgrade/reef-x/stress-split/2-first-half-tasks/rbd-cls.yaml
index f092096f444..79ad2af8ea1 100644
--- a/qa/suites/upgrade/reef-x/stress-split/2-first-half-tasks/rbd-cls.yaml
+++ b/qa/suites/upgrade/reef-x/stress-split/2-first-half-tasks/rbd-cls.yaml
@@ -7,4 +7,6 @@ first-half-tasks:
clients:
client.0:
- cls/test_cls_rbd.sh
+ env:
+ CLS_RBD_GTEST_FILTER: '-TestClsRbd.group_snap_set:TestClsRbd.group_snap_remove'
- print: "**** done cls/test_cls_rbd.sh 5-workload"
diff --git a/qa/suites/upgrade/reef-x/stress-split/3-stress-tasks/rbd-cls.yaml b/qa/suites/upgrade/reef-x/stress-split/3-stress-tasks/rbd-cls.yaml
index 05bb672b3ac..166327a58f9 100644
--- a/qa/suites/upgrade/reef-x/stress-split/3-stress-tasks/rbd-cls.yaml
+++ b/qa/suites/upgrade/reef-x/stress-split/3-stress-tasks/rbd-cls.yaml
@@ -7,4 +7,6 @@ stress-tasks:
clients:
client.0:
- cls/test_cls_rbd.sh
+ env:
+ CLS_RBD_GTEST_FILTER: '-TestClsRbd.group_snap_set:TestClsRbd.group_snap_remove'
- print: "**** done cls/test_cls_rbd.sh 5-workload"
diff --git a/qa/suites/upgrade/reef-x/stress-split/overrides/ignorelist_health.yaml b/qa/suites/upgrade/reef-x/stress-split/overrides/ignorelist_health.yaml
new file mode 100644
index 00000000000..fa93b2f2ece
--- /dev/null
+++ b/qa/suites/upgrade/reef-x/stress-split/overrides/ignorelist_health.yaml
@@ -0,0 +1,19 @@
+overrides:
+ ceph:
+ log-ignorelist:
+ - MDS_ALL_DOWN
+ - MDS_UP_LESS_THAN_MAX
+ - OSD_SLOW_PING_TIME
+ - reached quota
+ - running out of quota
+ - overall HEALTH_
+ - CACHE_POOL_NO_HIT_SET
+ - pool\(s\) full
+ - POOL_FULL
+ - SMALLER_PGP_NUM
+ - SLOW_OPS
+ - CACHE_POOL_NEAR_FULL
+ - OBJECT_MISPLACED
+ - slow request
+ - noscrub
+ - nodeep-scrub
diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py
index 9b04e3dc675..8f666d2fa9b 100644
--- a/qa/tasks/ceph.py
+++ b/qa/tasks/ceph.py
@@ -1206,8 +1206,18 @@ def cluster(ctx, config):
args.extend([
run.Raw('|'), 'head', '-n', '1',
])
- stdout = mon0_remote.sh(args)
- return stdout or None
+ r = mon0_remote.run(
+ stdout=BytesIO(),
+ args=args,
+ stderr=StringIO(),
+ )
+ stdout = r.stdout.getvalue().decode()
+ if stdout:
+ return stdout
+ stderr = r.stderr.getvalue()
+ if stderr:
+ return stderr
+ return None
if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
config['log_ignorelist']) is not None:
diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py
index 7005c8db0ff..57d22f3b5e6 100644
--- a/qa/tasks/ceph_manager.py
+++ b/qa/tasks/ceph_manager.py
@@ -2796,6 +2796,59 @@ class CephManager:
num += 1
return num
+ def _print_not_active_clean_pg(self, pgs):
+ """
+ Print the PGs that are not active+clean.
+ """
+ for pg in pgs:
+ if not (pg['state'].count('active') and
+ pg['state'].count('clean') and
+ not pg['state'].count('stale')):
+ log.debug(
+ "PG %s is not active+clean, but %s",
+ pg['pgid'], pg['state']
+ )
+
+ def pg_all_active_clean(self):
+ """
+ Check if all pgs are active+clean
+ return: True if all pgs are active+clean else False
+ """
+ pgs = self.get_pg_stats()
+ result = self._get_num_active_clean(pgs) == len(pgs)
+ if result:
+ log.debug("All PGs are active+clean")
+ else:
+ log.debug("Not all PGs are active+clean")
+ self._print_not_active_clean_pg(pgs)
+ return result
+
+ def _print_not_active_pg(self, pgs):
+ """
+ Print the PGs that are not active.
+ """
+ for pg in pgs:
+ if not (pg['state'].count('active')
+ and not pg['state'].count('stale')):
+ log.debug(
+ "PG %s is not active, but %s",
+ pg['pgid'], pg['state']
+ )
+
+ def pg_all_active(self):
+ """
+ Check if all pgs are active
+ return: True if all pgs are active else False
+ """
+ pgs = self.get_pg_stats()
+ result = self._get_num_active(pgs) == len(pgs)
+ if result:
+ log.debug("All PGs are active")
+ else:
+ log.debug("Not all PGs are active")
+ self._print_not_active_pg(pgs)
+ return result
+
def is_clean(self):
"""
True if all pgs are clean
@@ -3237,6 +3290,26 @@ class CephManager:
self.make_admin_daemon_dir(remote)
self.ctx.daemons.get_daemon('mgr', mgr, self.cluster).restart()
+ def get_crush_rule_id(self, crush_rule_name):
+ """
+ Get crush rule id by name
+ :returns: int -- crush rule id
+ """
+ out = self.raw_cluster_cmd('osd', 'crush', 'rule', 'dump', '--format=json')
+ j = json.loads('\n'.join(out.split('\n')[1:]))
+ for rule in j:
+ if rule['rule_name'] == crush_rule_name:
+ return rule['rule_id']
+ assert False, 'rule %s not found' % crush_rule_name
+
+ def get_mon_dump_json(self):
+ """
+ mon dump --format=json converted to a python object
+ :returns: the python object
+ """
+ out = self.raw_cluster_cmd('mon', 'dump', '--format=json')
+ return json.loads('\n'.join(out.split('\n')[1:]))
+
def get_mon_status(self, mon):
"""
Extract all the monitor status information from the cluster
@@ -3340,6 +3413,23 @@ class CephManager:
self.log(task_status)
return task_status
+ # Stretch mode related functions
+ def is_degraded_stretch_mode(self):
+ """
+ Return whether the cluster is in degraded stretch mode
+ """
+ try:
+ osdmap = self.get_osd_dump_json()
+ stretch_mode = osdmap.get('stretch_mode', {})
+ degraded_stretch_mode = stretch_mode.get('degraded_stretch_mode', 0)
+ self.log("is_degraded_stretch_mode: {0}".format(degraded_stretch_mode))
+ return degraded_stretch_mode == 1
+ except (TypeError, AttributeError) as e:
+ # Log the error or handle it as needed
+ self.log("Error accessing degraded_stretch_mode: {0}".format(e))
+ return False
+
+
def utility_task(name):
"""
Generate ceph_manager subtask corresponding to ceph_manager
diff --git a/qa/tasks/cephadm.py b/qa/tasks/cephadm.py
index dab61c2c700..0cde6050718 100644
--- a/qa/tasks/cephadm.py
+++ b/qa/tasks/cephadm.py
@@ -475,12 +475,16 @@ def ceph_log(ctx, config):
run.Raw('|'), 'head', '-n', '1',
])
r = ctx.ceph[cluster_name].bootstrap_remote.run(
- stdout=StringIO(),
+ stdout=BytesIO(),
args=args,
+ stderr=StringIO(),
)
- stdout = r.stdout.getvalue()
- if stdout != '':
+ stdout = r.stdout.getvalue().decode()
+ if stdout:
return stdout
+ stderr = r.stderr.getvalue()
+ if stderr:
+ return stderr
return None
# NOTE: technically the first and third arg to first_in_ceph_log
diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py
index c1312ec5efc..21b96d2b22b 100644
--- a/qa/tasks/cephfs/cephfs_test_case.py
+++ b/qa/tasks/cephfs/cephfs_test_case.py
@@ -252,8 +252,8 @@ class CephFSTestCase(CephTestCase):
def get_session_data(self, client_id):
return self._session_by_id(client_id)
- def _session_list(self):
- ls_data = self.fs.mds_asok(['session', 'ls'])
+ def _session_list(self, rank=None, status=None):
+ ls_data = self.fs.rank_asok(['session', 'ls'], rank=rank, status=status)
ls_data = [s for s in ls_data if s['state'] not in ['stale', 'closed']]
return ls_data
@@ -269,9 +269,9 @@ class CephFSTestCase(CephTestCase):
def perf_dump(self, rank=None, status=None):
return self.fs.rank_asok(['perf', 'dump'], rank=rank, status=status)
- def wait_until_evicted(self, client_id, timeout=30):
+ def wait_until_evicted(self, client_id, rank=None, timeout=30):
def is_client_evicted():
- ls = self._session_list()
+ ls = self._session_list(rank=rank)
for s in ls:
if s['id'] == client_id:
return False
diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py
index 2b7fd2ee569..3846ef23f97 100644
--- a/qa/tasks/cephfs/filesystem.py
+++ b/qa/tasks/cephfs/filesystem.py
@@ -649,6 +649,8 @@ class FilesystemBase(MDSClusterBase):
def set_session_timeout(self, timeout):
self.set_var("session_timeout", "%d" % timeout)
+ def set_session_autoclose(self, autoclose_time):
+ self.set_var("session_autoclose", "%d" % autoclose_time)
def set_allow_standby_replay(self, yes):
self.set_var("allow_standby_replay", yes)
diff --git a/qa/tasks/cephfs/test_admin.py b/qa/tasks/cephfs/test_admin.py
index 00a68dd0183..beb41019e6d 100644
--- a/qa/tasks/cephfs/test_admin.py
+++ b/qa/tasks/cephfs/test_admin.py
@@ -2740,3 +2740,184 @@ class TestFSSetMaxMDS(TestAdminCommands):
'''
self.fs.set_max_mds(2, confirm=True)
self.assertEqual(self.fs.get_var('max_mds'), 2)
+
+
+class TestToggleVolumes(CephFSTestCase):
+ '''
+ Contains code for enabling/disabling mgr/volumes plugin.
+ '''
+
+ VOL_MOD_NAME = 'volumes'
+ CONFIRM = '--yes-i-really-mean-it'
+
+ def tearDown(self):
+ '''
+ Ensure that the volumes plugin is enabled after the test has finished
+ running since not doing so might affect tearDown() of CephFSTestCase or
+ other superclasses.
+ '''
+ json_output = self.get_ceph_cmd_stdout('mgr module ls --format json')
+ json_output = json.loads(json_output)
+
+ if 'volumes' in json_output['force_disabled_modules']:
+ self.run_ceph_cmd(f'mgr module enable {self.VOL_MOD_NAME}')
+
+ super(TestToggleVolumes, self).tearDown()
+
+ def test_force_disable_with_confirmation(self):
+ '''
+ Test that running "ceph mgr module force disable volumes
+ --yes-i-really-mean-it" successfully disables volumes plugin.
+
+ Also test "ceph mgr module ls" output after this.
+ '''
+ self.run_ceph_cmd(f'mgr module force disable {self.VOL_MOD_NAME} '
+ f'{self.CONFIRM}')
+
+ json_output = self.get_ceph_cmd_stdout('mgr module ls --format json')
+ json_output = json.loads(json_output)
+
+ self.assertIn(self.VOL_MOD_NAME, json_output['always_on_modules'])
+ self.assertIn(self.VOL_MOD_NAME, json_output['force_disabled_modules'])
+
+ self.assertNotIn(self.VOL_MOD_NAME, json_output['enabled_modules'])
+ self.assertNotIn(self.VOL_MOD_NAME, json_output['disabled_modules'])
+
+ def test_force_disable_fails_without_confirmation(self):
+ '''
+ Test that running "ceph mgr module force disable volumes" fails with
+ EPERM when confirmation flag is not passed along.
+
+ Also test that output of this command suggests user to pass
+ --yes-i-really-mean-it.
+ '''
+ proc = self.run_ceph_cmd(
+ f'mgr module force disable {self.VOL_MOD_NAME}',
+ stderr=StringIO(), check_status=False)
+
+ self.assertEqual(proc.returncode, errno.EPERM)
+
+ proc_stderr = proc.stderr.getvalue()
+ self.assertIn('EPERM', proc_stderr)
+ # ensure that the confirmation flag was recommended
+ self.assertIn(self.CONFIRM, proc_stderr)
+
+ def test_force_disable_idempotency(self):
+ '''
+ Test that running "ceph mgr module force disable volumes" passes when
+ volumes plugin was already force disabled.
+ '''
+ self.run_ceph_cmd(f'mgr module force disable {self.VOL_MOD_NAME} '
+ f'{self.CONFIRM}')
+ sleep(5)
+
+ json_output = self.get_ceph_cmd_stdout('mgr module ls --format '
+ 'json-pretty')
+ json_output = json.loads(json_output)
+
+ self.assertIn(self.VOL_MOD_NAME, json_output['always_on_modules'])
+ self.assertIn(self.VOL_MOD_NAME, json_output['force_disabled_modules'])
+
+ self.assertNotIn(self.VOL_MOD_NAME, json_output['enabled_modules'])
+ self.assertNotIn(self.VOL_MOD_NAME, json_output['disabled_modules'])
+
+ # XXX: this this test, running this command 2nd time should pass.
+ self.run_ceph_cmd(f'mgr module force disable {self.VOL_MOD_NAME}')
+
+ def test_force_disable_nonexistent_mod(self):
+ '''
+ Test that passing non-existent name to "ceph mgr module force disable"
+ command leads to an error.
+ '''
+ proc = self.run_ceph_cmd(
+ f'mgr module force disable abcd {self.CONFIRM}',
+ check_status=False, stderr=StringIO())
+ self.assertEqual(proc.returncode, errno.EINVAL)
+ self.assertIn('EINVAL', proc.stderr.getvalue())
+
+ def test_force_disable_non_alwayson_mod(self):
+ '''
+ Test that passing non-existent name to "ceph mgr module force disable"
+ command leads to an error.
+ '''
+ json_output = self.get_ceph_cmd_stdout(
+ 'mgr module ls --format json-pretty', check_status=False,
+ stderr=StringIO())
+ output_dict = json.loads(json_output)
+ some_non_alwayson_mod = output_dict['enabled_modules'][0]
+
+ proc = self.run_ceph_cmd(
+ f'mgr module force disable {some_non_alwayson_mod} {self.CONFIRM}',
+ check_status=False, stderr=StringIO())
+ self.assertEqual(proc.returncode, errno.EINVAL)
+ self.assertIn('EINVAL', proc.stderr.getvalue())
+
+ def test_enabled_by_default(self):
+ '''
+ Test that volumes plugin is enabled by default and is also reported as
+ "always on".
+ '''
+ json_output = self.get_ceph_cmd_stdout('mgr module ls --format json')
+ json_output = json.loads(json_output)
+
+ self.assertIn(self.VOL_MOD_NAME, json_output['always_on_modules'])
+
+ self.assertNotIn(self.VOL_MOD_NAME, json_output['enabled_modules'])
+ self.assertNotIn(self.VOL_MOD_NAME, json_output['disabled_modules'])
+ self.assertNotIn(self.VOL_MOD_NAME, json_output['force_disabled_modules'])
+
+ def test_disable_fails(self):
+ '''
+ Test that running "ceph mgr module disable volumes" fails with EPERM.
+
+ This is expected since volumes is an always-on module and therefore
+ it can only be disabled using command "ceph mgr module force disable
+ volumes".
+ '''
+ proc = self.run_ceph_cmd(f'mgr module disable {self.VOL_MOD_NAME}',
+ stderr=StringIO(), check_status=False)
+ self.assertEqual(proc.returncode, errno.EPERM)
+
+ proc_stderr = proc.stderr.getvalue()
+ self.assertIn('EPERM', proc_stderr)
+
+ def test_enable_idempotency(self):
+ '''
+ Test that enabling volumes plugin when it is already enabled doesn't
+ exit with non-zero return value.
+
+ Also test that it reports plugin as already enabled.
+ '''
+ proc = self.run_ceph_cmd(f'mgr module enable {self.VOL_MOD_NAME}',
+ stderr=StringIO())
+ self.assertEqual(proc.returncode, 0)
+
+ proc_stderr = proc.stderr.getvalue()
+ self.assertIn('already enabled', proc_stderr)
+ self.assertIn('always-on', proc_stderr)
+
+ def test_enable_post_disabling(self):
+ '''
+ Test that enabling volumes plugin after (force-)disabling it works
+ successfully.
+
+ Alo test "ceph mgr module ls" output for volumes plugin afterwards.
+ '''
+ self.run_ceph_cmd(f'mgr module force disable {self.VOL_MOD_NAME} '
+ f'{self.CONFIRM}')
+ # give bit of time for plugin to be disabled.
+ sleep(5)
+
+ self.run_ceph_cmd(f'mgr module enable {self.VOL_MOD_NAME}')
+ # give bit of time for plugin to be functional again
+ sleep(5)
+ json_output = self.get_ceph_cmd_stdout('mgr module ls --format json')
+ json_output = json.loads(json_output)
+ self.assertIn(self.VOL_MOD_NAME, json_output['always_on_modules'])
+ self.assertNotIn(self.VOL_MOD_NAME, json_output['enabled_modules'])
+ self.assertNotIn(self.VOL_MOD_NAME, json_output['disabled_modules'])
+ self.assertNotIn(self.VOL_MOD_NAME, json_output['force_disabled_modules'])
+
+ # plugin is reported properly by "ceph mgr module ls" command, check if
+ # it is also working fine.
+ self.run_ceph_cmd('fs volume ls')
diff --git a/qa/tasks/cephfs/test_exports.py b/qa/tasks/cephfs/test_exports.py
index 16de379f54f..468378fce3d 100644
--- a/qa/tasks/cephfs/test_exports.py
+++ b/qa/tasks/cephfs/test_exports.py
@@ -4,6 +4,7 @@ import time
from tasks.cephfs.fuse_mount import FuseMount
from tasks.cephfs.cephfs_test_case import CephFSTestCase
from teuthology.exceptions import CommandFailedError
+from teuthology.contextutil import safe_while, MaxWhileTries
log = logging.getLogger(__name__)
@@ -152,6 +153,8 @@ class TestExportPin(CephFSTestCase):
# vstart.sh sets mds_debug_subtrees to True. That causes a ESubtreeMap
# to be written out every event. Yuck!
self.config_set('mds', 'mds_debug_subtrees', False)
+ # make sure ESubtreeMap is written frequently enough:
+ self.config_set('mds', 'mds_log_minor_segments_per_major_segment', '4')
self.config_rm('mds', 'mds bal split size') # don't split /top
self.mount_a.run_shell_payload("rm -rf 1")
@@ -628,3 +631,186 @@ done
log.info("{0} migrations have occured due to the cluster resizing".format(count))
# rebalancing from 3 -> 2 may cause half of rank 0/1 to move and all of rank 2
self.assertLessEqual((count/len(subtrees_old)), (1.0/3.0/2.0 + 1.0/3.0/2.0 + 1.0/3.0)*1.25) # aka .66 with 25% overbudget
+
+class TestDumpExportStates(CephFSTestCase):
+ MDSS_REQUIRED = 2
+ CLIENTS_REQUIRED = 1
+
+ EXPORT_STATES = ['locking', 'discovering', 'freezing', 'prepping', 'warning', 'exporting']
+
+ def setUp(self):
+ super().setUp()
+
+ self.fs.set_max_mds(self.MDSS_REQUIRED)
+ self.status = self.fs.wait_for_daemons()
+
+ self.mount_a.run_shell_payload('mkdir -p test/export')
+
+ def tearDown(self):
+ super().tearDown()
+
+ def _wait_for_export_target(self, source, target, sleep=2, timeout=10):
+ try:
+ with safe_while(sleep=sleep, tries=timeout//sleep) as proceed:
+ while proceed():
+ info = self.fs.getinfo().get_rank(self.fs.id, source)
+ log.info(f'waiting for rank {target} to be added to the export target')
+ if target in info['export_targets']:
+ return
+ except MaxWhileTries as e:
+ raise RuntimeError(f'rank {target} has not been added to export target after {timeout}s') from e
+
+ def _dump_export_state(self, rank):
+ states = self.fs.rank_asok(['dump_export_states'], rank=rank, status=self.status)
+ self.assertTrue(type(states) is list)
+ self.assertEqual(len(states), 1)
+ return states[0]
+
+ def _test_base(self, path, source, target, state_index, kill):
+ self.fs.rank_asok(['config', 'set', 'mds_kill_import_at', str(kill)], rank=target, status=self.status)
+
+ self.fs.rank_asok(['export', 'dir', path, str(target)], rank=source, status=self.status)
+ self._wait_for_export_target(source, target)
+
+ target_rank = self.fs.get_rank(rank=target, status=self.status)
+ self.delete_mds_coredump(target_rank['name'])
+
+ state = self._dump_export_state(source)
+
+ self.assertTrue(type(state['tid']) is int)
+ self.assertEqual(state['path'], path)
+ self.assertEqual(state['state'], self.EXPORT_STATES[state_index])
+ self.assertEqual(state['peer'], target)
+
+ return state
+
+ def _test_state_history(self, state):
+ history = state['state_history']
+ self.assertTrue(type(history) is dict)
+ size = 0
+ for name in self.EXPORT_STATES:
+ self.assertTrue(type(history[name]) is dict)
+ size += 1
+ if name == state['state']:
+ break
+ self.assertEqual(len(history), size)
+
+ def _test_freeze_tree(self, state, waiters):
+ self.assertTrue(type(state['freeze_tree_time']) is float)
+ self.assertEqual(state['unfreeze_tree_waiters'], waiters)
+
+ def test_discovering(self):
+ state = self._test_base('/test', 0, 1, 1, 1)
+
+ self._test_state_history(state)
+ self._test_freeze_tree(state, 0)
+
+ self.assertEqual(state['last_cum_auth_pins'], 0)
+ self.assertEqual(state['num_remote_waiters'], 0)
+
+ def test_prepping(self):
+ client_id = self.mount_a.get_global_id()
+
+ state = self._test_base('/test', 0, 1, 3, 3)
+
+ self._test_state_history(state)
+ self._test_freeze_tree(state, 0)
+
+ self.assertEqual(state['flushed_clients'], [client_id])
+ self.assertTrue(type(state['warning_ack_waiting']) is list)
+
+ def test_exporting(self):
+ state = self._test_base('/test', 0, 1, 5, 5)
+
+ self._test_state_history(state)
+ self._test_freeze_tree(state, 0)
+
+ self.assertTrue(type(state['notify_ack_waiting']) is list)
+
+class TestKillExports(CephFSTestCase):
+ MDSS_REQUIRED = 2
+ CLIENTS_REQUIRED = 1
+
+ def setUp(self):
+ CephFSTestCase.setUp(self)
+
+ self.fs.set_max_mds(self.MDSS_REQUIRED)
+ self.status = self.fs.wait_for_daemons()
+
+ self.mount_a.run_shell_payload('mkdir -p test/export')
+
+ def tearDown(self):
+ super().tearDown()
+
+ def _kill_export_as(self, rank, kill):
+ self.fs.rank_asok(['config', 'set', 'mds_kill_export_at', str(kill)], rank=rank, status=self.status)
+
+ def _export_dir(self, path, source, target):
+ self.fs.rank_asok(['export', 'dir', path, str(target)], rank=source, status=self.status)
+
+ def _wait_failover(self):
+ self.wait_until_true(lambda: self.fs.status().hadfailover(self.status), timeout=self.fs.beacon_timeout)
+
+ def _clear_coredump(self, rank):
+ crash_rank = self.fs.get_rank(rank=rank, status=self.status)
+ self.delete_mds_coredump(crash_rank['name'])
+
+ def _run_kill_export(self, kill_at, exporter_rank=0, importer_rank=1, restart=True):
+ self._kill_export_as(exporter_rank, kill_at)
+ self._export_dir("/test", exporter_rank, importer_rank)
+ self._wait_failover()
+ self._clear_coredump(exporter_rank)
+
+ if restart:
+ self.fs.rank_restart(rank=exporter_rank, status=self.status)
+ self.status = self.fs.wait_for_daemons()
+
+ def test_session_cleanup(self):
+ """
+ Test importer's session cleanup after an export subtree task is interrupted.
+ Set 'mds_kill_export_at' to 9 or 10 so that the importer will wait for the exporter
+ to restart while the state is 'acking'.
+
+ See https://tracker.ceph.com/issues/61459
+ """
+
+ kill_export_at = [9, 10]
+
+ exporter_rank = 0
+ importer_rank = 1
+
+ for kill in kill_export_at:
+ log.info(f"kill_export_at: {kill}")
+ self._run_kill_export(kill, exporter_rank, importer_rank)
+
+ if len(self._session_list(importer_rank, self.status)) > 0:
+ client_id = self.mount_a.get_global_id()
+ self.fs.rank_asok(['session', 'evict', "%s" % client_id], rank=importer_rank, status=self.status)
+
+ # timeout if buggy
+ self.wait_until_evicted(client_id, importer_rank)
+
+ # for multiple tests
+ self.mount_a.remount()
+
+ def test_client_eviction(self):
+ # modify the timeout so that we don't have to wait too long
+ timeout = 30
+ self.fs.set_session_timeout(timeout)
+ self.fs.set_session_autoclose(timeout + 5)
+
+ kill_export_at = [9, 10]
+
+ exporter_rank = 0
+ importer_rank = 1
+
+ for kill in kill_export_at:
+ log.info(f"kill_export_at: {kill}")
+ self._run_kill_export(kill, exporter_rank, importer_rank)
+
+ client_id = self.mount_a.get_global_id()
+ self.wait_until_evicted(client_id, importer_rank, timeout + 10)
+ time.sleep(1)
+
+ # failed if buggy
+ self.mount_a.ls()
diff --git a/qa/tasks/cephfs/test_failover.py b/qa/tasks/cephfs/test_failover.py
index 29af1e76a4f..46139163ddd 100644
--- a/qa/tasks/cephfs/test_failover.py
+++ b/qa/tasks/cephfs/test_failover.py
@@ -1,3 +1,4 @@
+import re
import time
import signal
import logging
@@ -342,6 +343,60 @@ class TestClusterResize(CephFSTestCase):
self.fs.wait_for_daemons(timeout=90)
+class TestFailoverBeaconHealth(CephFSTestCase):
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 1
+
+ def initiate_journal_replay(self, num_files=100):
+ """ Initiate journal replay by creating files and restarting mds server."""
+
+ self.config_set("mds", "mds_delay_journal_replay_for_testing", "5000")
+ self.mounts[0].test_files = [str(x) for x in range(num_files)]
+ self.mounts[0].create_files()
+ self.fs.fail()
+ self.fs.set_joinable()
+
+ def test_replay_beacon_estimated_time(self):
+ """
+ That beacon emits warning message with estimated time to complete replay
+ """
+ self.initiate_journal_replay()
+ self.wait_for_health("MDS_ESTIMATED_REPLAY_TIME", 60)
+ # remove the config so that replay finishes and the cluster
+ # is HEALTH_OK
+ self.config_rm("mds", "mds_delay_journal_replay_for_testing")
+ self.wait_for_health_clear(timeout=60)
+
+ def test_replay_estimated_time_accuracy(self):
+ self.initiate_journal_replay(250)
+ def replay_complete():
+ health = self.ceph_cluster.mon_manager.get_mon_health(debug=False, detail=True)
+ codes = [s for s in health['checks']]
+ return 'MDS_ESTIMATED_REPLAY_TIME' not in codes
+
+ def get_estimated_time():
+ completion_percentage = 0.0
+ time_duration = pending_duration = 0
+ with safe_while(sleep=5, tries=360) as proceed:
+ while proceed():
+ health = self.ceph_cluster.mon_manager.get_mon_health(debug=False, detail=True)
+ codes = [s for s in health['checks']]
+ if 'MDS_ESTIMATED_REPLAY_TIME' in codes:
+ message = health['checks']['MDS_ESTIMATED_REPLAY_TIME']['detail'][0]['message']
+ ### sample warning string: "mds.a(mds.0): replay: 50.0446% complete - elapsed time: 582s, estimated time remaining: 581s"
+ m = re.match(".* replay: (\d+(\.\d+)?)% complete - elapsed time: (\d+)s, estimated time remaining: (\d+)s", message)
+ if not m:
+ continue
+ completion_percentage = float(m.group(1))
+ time_duration = int(m.group(3))
+ pending_duration = int(m.group(4))
+ log.debug(f"MDS_ESTIMATED_REPLAY_TIME is present in health: {message}, duration: {time_duration}, completion_percentage: {completion_percentage}")
+ if completion_percentage >= 50:
+ return (completion_percentage, time_duration, pending_duration)
+ _, _, pending_duration = get_estimated_time()
+ # wait for 25% more time to avoid false negative failures
+ self.wait_until_true(replay_complete, timeout=pending_duration * 1.25)
+
class TestFailover(CephFSTestCase):
CLIENTS_REQUIRED = 1
MDSS_REQUIRED = 2
diff --git a/qa/tasks/cephfs/test_nfs.py b/qa/tasks/cephfs/test_nfs.py
index 19076ea44b3..0a1c07dce04 100644
--- a/qa/tasks/cephfs/test_nfs.py
+++ b/qa/tasks/cephfs/test_nfs.py
@@ -55,7 +55,7 @@ class TestNFS(MgrTestCase):
"squash": "none",
"security_label": True,
"protocols": [
- 4
+ 3, 4
],
"transports": [
"TCP"
@@ -369,6 +369,45 @@ class TestNFS(MgrTestCase):
except CommandFailedError as e:
self.fail(f"expected read/write of a file to be successful but failed with {e.exitstatus}")
+ def _mnt_nfs(self, pseudo_path, port, ip):
+ '''
+ Mount created export
+ :param pseudo_path: It is the pseudo root name
+ :param port: Port of deployed nfs cluster
+ :param ip: IP of deployed nfs cluster
+ '''
+ tries = 3
+ while True:
+ try:
+ self.ctx.cluster.run(
+ args=['sudo', 'mount', '-t', 'nfs', '-o', f'port={port}',
+ f'{ip}:{pseudo_path}', '/mnt'])
+ break
+ except CommandFailedError:
+ if tries:
+ tries -= 1
+ time.sleep(2)
+ continue
+ raise
+
+ self.ctx.cluster.run(args=['sudo', 'chmod', '1777', '/mnt'])
+
+ def _test_fio(self, pseudo_path, port, ip):
+ '''
+ run fio with libaio on /mnt/fio
+ :param mnt_path: nfs mount point
+ '''
+ try:
+ self._mnt_nfs(pseudo_path, port, ip)
+ self.ctx.cluster.run(args=['mkdir', '/mnt/fio'])
+ fio_cmd=['sudo', 'fio', '--ioengine=libaio', '-directory=/mnt/fio', '--filename=fio.randrw.test', '--name=job', '--bs=16k', '--direct=1', '--group_reporting', '--iodepth=128', '--randrepeat=0', '--norandommap=1', '--thread=2', '--ramp_time=20s', '--offset_increment=5%', '--size=5G', '--time_based', '--runtime=300', '--ramp_time=1s', '--percentage_random=0', '--rw=randrw', '--rwmixread=50']
+ self.ctx.cluster.run(args=fio_cmd)
+ except CommandFailedError as e:
+ self.fail(f"expected fio to be successful but failed with {e.exitstatus}")
+ finally:
+ self.ctx.cluster.run(args=['sudo', 'rm', '-rf', '/mnt/fio'])
+ self.ctx.cluster.run(args=['sudo', 'umount', '/mnt'])
+
def _write_to_read_only_export(self, pseudo_path, port, ip):
'''
Check if write to read only export fails
@@ -627,6 +666,18 @@ class TestNFS(MgrTestCase):
self._test_data_read_write(self.pseudo_path, port, ip)
self._test_delete_cluster()
+ def test_async_io_fio(self):
+ '''
+ Test async io using fio. Expect completion without hang or crash
+ '''
+ self._test_create_cluster()
+ self._create_export(export_id='1', create_fs=True,
+ extra_cmd=['--pseudo-path', self.pseudo_path])
+ port, ip = self._get_port_ip_info()
+ self._check_nfs_cluster_status('running', 'NFS Ganesha cluster restart failed')
+ self._test_fio(self.pseudo_path, port, ip)
+ self._test_delete_cluster()
+
def test_cluster_info(self):
'''
Test cluster info outputs correct ip and hostname
diff --git a/qa/tasks/cephfs/test_quota.py b/qa/tasks/cephfs/test_quota.py
index b5691c83852..ae1c1f2056c 100644
--- a/qa/tasks/cephfs/test_quota.py
+++ b/qa/tasks/cephfs/test_quota.py
@@ -115,9 +115,11 @@ class TestQuota(CephFSTestCase):
readable_values = {"10K": "10240",
"100Ki": "102400",
+ "100KiB": "102400",
"10M": "10485760",
"100Mi": "104857600",
"2G": "2147483648",
+ "2GB": "2147483648",
"4Gi": "4294967296",
"1T": "1099511627776",
"2Ti": "2199023255552"}
@@ -135,7 +137,8 @@ class TestQuota(CephFSTestCase):
self.mount_a.run_shell(["mkdir", "subdir"])
- invalid_values = ["10A", "1y00Ki", "af00", "G", "", " ", "-1t", "-1"]
+ invalid_values = ["10A", "1y00Ki", "af00", "G", "", " ", "-1t", "-1",
+ "1GT", "2MM", "5Di", "8Bi", "i", "7iB"]
for invalid_value in invalid_values:
with self.assertRaises(CommandFailedError):
self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes",
diff --git a/qa/tasks/check_counter.py b/qa/tasks/check_counter.py
index 40818f3f475..1f63b6a0bd4 100644
--- a/qa/tasks/check_counter.py
+++ b/qa/tasks/check_counter.py
@@ -1,11 +1,14 @@
import logging
import json
+import errno
from teuthology.task import Task
from teuthology import misc
from tasks import ceph_manager
+from tasks.cephfs.filesystem import MDSCluster
+from teuthology.exceptions import CommandFailedError
log = logging.getLogger(__name__)
@@ -61,6 +64,9 @@ class CheckCounter(Task):
mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=self.ctx, logger=log.getChild('ceph_manager'))
active_mgr = json.loads(mon_manager.raw_cluster_cmd("mgr", "dump", "--format=json-pretty"))["active_name"]
+ mds_cluster = MDSCluster(self.ctx)
+ status = mds_cluster.status()
+
for daemon_type, counters in targets.items():
# List of 'a', 'b', 'c'...
daemon_ids = list(misc.all_roles_of_type(self.ctx.cluster, daemon_type))
@@ -80,13 +86,31 @@ class CheckCounter(Task):
else:
log.debug("Getting stats from {0}".format(daemon_id))
- manager = self.ctx.managers[cluster_name]
- proc = manager.admin_socket(daemon_type, daemon_id, ["perf", "dump"])
- response_data = proc.stdout.getvalue().strip()
+ if daemon_type == 'mds':
+ mds_info = status.get_mds(daemon_id)
+ if not mds_info:
+ continue
+ mds = f"mds.{mds_info['gid']}"
+ if mds_info['state'] != "up:active":
+ log.debug(f"skipping {mds}")
+ continue
+ log.debug(f"Getting stats from {mds}")
+ try:
+ proc = mon_manager.raw_cluster_cmd("tell", mds, "perf", "dump",
+ "--format=json-pretty")
+ response_data = proc.strip()
+ except CommandFailedError as e:
+ if e.exitstatus == errno.ENOENT:
+ log.debug(f"Failed to do 'perf dump' on {mds}")
+ continue
+ else:
+ manager = self.ctx.managers[cluster_name]
+ proc = manager.admin_socket(daemon_type, daemon_id, ["perf", "dump"])
+ response_data = proc.stdout.getvalue().strip()
if response_data:
perf_dump = json.loads(response_data)
else:
- log.warning("No admin socket response from {0}, skipping".format(daemon_id))
+ log.warning("No response from {0}, skipping".format(daemon_id))
continue
minval = ''
diff --git a/qa/tasks/fwd_scrub.py b/qa/tasks/fwd_scrub.py
index 2ac92439de6..d955d232c2c 100644
--- a/qa/tasks/fwd_scrub.py
+++ b/qa/tasks/fwd_scrub.py
@@ -33,6 +33,8 @@ class ForwardScrubber(ThrasherGreenlet):
def _run(self):
try:
self.do_scrub()
+ except ThrasherGreenlet.Stopped:
+ pass
except Exception as e:
self.set_thrasher_exception(e)
self.logger.exception("exception:")
diff --git a/qa/tasks/kafka.py b/qa/tasks/kafka.py
index 5e6c208ca30..833f03babf6 100644
--- a/qa/tasks/kafka.py
+++ b/qa/tasks/kafka.py
@@ -4,6 +4,7 @@ Deploy and configure Kafka for Teuthology
import contextlib
import logging
import time
+import os
from teuthology import misc as teuthology
from teuthology import contextutil
@@ -33,6 +34,13 @@ def install_kafka(ctx, config):
assert isinstance(config, dict)
log.info('Installing Kafka...')
+ # programmatically find a nearby mirror so as not to hammer archive.apache.org
+ apache_mirror_cmd="curl 'https://www.apache.org/dyn/closer.cgi' 2>/dev/null | " \
+ "grep -o '<strong>[^<]*</strong>' | sed 's/<[^>]*>//g' | head -n 1"
+ log.info("determining apache mirror by running: " + apache_mirror_cmd)
+ apache_mirror_url_front = os.popen(apache_mirror_cmd).read().rstrip() # note: includes trailing slash (/)
+ log.info("chosen apache mirror is " + apache_mirror_url_front)
+
for (client, _) in config.items():
(remote,) = ctx.cluster.only(client).remotes.keys()
test_dir=teuthology.get_testdir(ctx)
@@ -40,7 +48,8 @@ def install_kafka(ctx, config):
kafka_file = kafka_prefix + current_version + '.tgz'
- link1 = 'https://archive.apache.org/dist/kafka/' + current_version + '/' + kafka_file
+ link1 = '{apache_mirror_url_front}/kafka/'.format(apache_mirror_url_front=apache_mirror_url_front) + \
+ current_version + '/' + kafka_file
ctx.cluster.only(client).run(
args=['cd', '{tdir}'.format(tdir=test_dir), run.Raw('&&'), 'wget', link1],
)
diff --git a/qa/tasks/kafka_failover.py b/qa/tasks/kafka_failover.py
new file mode 100644
index 00000000000..3ca60ab84fc
--- /dev/null
+++ b/qa/tasks/kafka_failover.py
@@ -0,0 +1,244 @@
+"""
+Deploy and configure Kafka for Teuthology
+"""
+import contextlib
+import logging
+import time
+import os
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+def get_kafka_version(config):
+ for client, client_config in config.items():
+ if 'kafka_version' in client_config:
+ kafka_version = client_config.get('kafka_version')
+ return kafka_version
+
+kafka_prefix = 'kafka_2.13-'
+
+def get_kafka_dir(ctx, config):
+ kafka_version = get_kafka_version(config)
+ current_version = kafka_prefix + kafka_version
+ return '{tdir}/{ver}'.format(tdir=teuthology.get_testdir(ctx),ver=current_version)
+
+
+@contextlib.contextmanager
+def install_kafka(ctx, config):
+ """
+ Downloading the kafka tar file.
+ """
+ assert isinstance(config, dict)
+ log.info('Installing Kafka...')
+
+ # programmatically find a nearby mirror so as not to hammer archive.apache.org
+ apache_mirror_cmd="curl 'https://www.apache.org/dyn/closer.cgi' 2>/dev/null | " \
+ "grep -o '<strong>[^<]*</strong>' | sed 's/<[^>]*>//g' | head -n 1"
+ log.info("determining apache mirror by running: " + apache_mirror_cmd)
+ apache_mirror_url_front = os.popen(apache_mirror_cmd).read().rstrip() # note: includes trailing slash (/)
+ log.info("chosen apache mirror is " + apache_mirror_url_front)
+
+ for (client, _) in config.items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ test_dir=teuthology.get_testdir(ctx)
+ current_version = get_kafka_version(config)
+
+ kafka_file = kafka_prefix + current_version + '.tgz'
+
+ link1 = '{apache_mirror_url_front}/kafka/'.format(apache_mirror_url_front=apache_mirror_url_front) + \
+ current_version + '/' + kafka_file
+ ctx.cluster.only(client).run(
+ args=['cd', '{tdir}'.format(tdir=test_dir), run.Raw('&&'), 'wget', link1],
+ )
+
+ ctx.cluster.only(client).run(
+ args=['cd', '{tdir}'.format(tdir=test_dir), run.Raw('&&'), 'tar', '-xvzf', kafka_file],
+ )
+
+ kafka_dir = get_kafka_dir(ctx, config)
+ # create config for second broker
+ second_broker_config_name = "server2.properties"
+ second_broker_data = "{tdir}/data/broker02".format(tdir=kafka_dir)
+ second_broker_data_logs_escaped = "{}/logs".format(second_broker_data).replace("/", "\/")
+
+ ctx.cluster.only(client).run(
+ args=['cd', '{tdir}'.format(tdir=kafka_dir), run.Raw('&&'),
+ 'cp', '{tdir}/config/server.properties'.format(tdir=kafka_dir), '{tdir}/config/{second_broker_config_name}'.format(tdir=kafka_dir, second_broker_config_name=second_broker_config_name), run.Raw('&&'),
+ 'mkdir', '-p', '{tdir}/data'.format(tdir=kafka_dir)
+ ],
+ )
+
+ # edit config
+ ctx.cluster.only(client).run(
+ args=['sed', '-i', 's/broker.id=0/broker.id=1/g', '{tdir}/config/{second_broker_config_name}'.format(tdir=kafka_dir, second_broker_config_name=second_broker_config_name), run.Raw('&&'),
+ 'sed', '-i', 's/#listeners=PLAINTEXT:\/\/:9092/listeners=PLAINTEXT:\/\/localhost:19092/g', '{tdir}/config/{second_broker_config_name}'.format(tdir=kafka_dir, second_broker_config_name=second_broker_config_name), run.Raw('&&'),
+ 'sed', '-i', 's/#advertised.listeners=PLAINTEXT:\/\/your.host.name:9092/advertised.listeners=PLAINTEXT:\/\/localhost:19092/g', '{tdir}/config/{second_broker_config_name}'.format(tdir=kafka_dir, second_broker_config_name=second_broker_config_name), run.Raw('&&'),
+ 'sed', '-i', 's/log.dirs=\/tmp\/kafka-logs/log.dirs={}/g'.format(second_broker_data_logs_escaped), '{tdir}/config/{second_broker_config_name}'.format(tdir=kafka_dir, second_broker_config_name=second_broker_config_name), run.Raw('&&'),
+ 'cat', '{tdir}/config/{second_broker_config_name}'.format(tdir=kafka_dir, second_broker_config_name=second_broker_config_name)
+ ]
+ )
+
+ try:
+ yield
+ finally:
+ log.info('Removing packaged dependencies of Kafka...')
+ test_dir=get_kafka_dir(ctx, config)
+ current_version = get_kafka_version(config)
+ for (client,_) in config.items():
+ ctx.cluster.only(client).run(
+ args=['rm', '-rf', '{tdir}/logs'.format(tdir=test_dir)],
+ )
+
+ ctx.cluster.only(client).run(
+ args=['rm', '-rf', test_dir],
+ )
+
+ ctx.cluster.only(client).run(
+ args=['rm', '-rf', '{tdir}/{doc}'.format(tdir=teuthology.get_testdir(ctx),doc=kafka_file)],
+ )
+
+
+@contextlib.contextmanager
+def run_kafka(ctx,config):
+ """
+ This includes two parts:
+ 1. Starting Zookeeper service
+ 2. Starting Kafka service
+ """
+ assert isinstance(config, dict)
+ log.info('Bringing up Zookeeper and Kafka services...')
+ for (client,_) in config.items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ kafka_dir = get_kafka_dir(ctx, config)
+
+ second_broker_data = "{tdir}/data/broker02".format(tdir=kafka_dir)
+ second_broker_java_log_dir = "{}/java_logs".format(second_broker_data)
+
+ ctx.cluster.only(client).run(
+ args=['cd', '{tdir}/bin'.format(tdir=kafka_dir), run.Raw('&&'),
+ './zookeeper-server-start.sh',
+ '{tir}/config/zookeeper.properties'.format(tir=kafka_dir),
+ run.Raw('&'), 'exit'
+ ],
+ )
+
+ ctx.cluster.only(client).run(
+ args=['cd', '{tdir}/bin'.format(tdir=kafka_dir), run.Raw('&&'),
+ './kafka-server-start.sh',
+ '{tir}/config/server.properties'.format(tir=get_kafka_dir(ctx, config)),
+ run.Raw('&'), 'exit'
+ ],
+ )
+
+ ctx.cluster.only(client).run(
+ args=['cd', '{tdir}/bin'.format(tdir=kafka_dir), run.Raw('&&'),
+ run.Raw('LOG_DIR={second_broker_java_log_dir}'.format(second_broker_java_log_dir=second_broker_java_log_dir)),
+ './kafka-server-start.sh', '{tdir}/config/server2.properties'.format(tdir=kafka_dir),
+ run.Raw('&'), 'exit'
+ ],
+ )
+
+ try:
+ yield
+ finally:
+ log.info('Stopping Zookeeper and Kafka Services...')
+
+ for (client, _) in config.items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+
+ ctx.cluster.only(client).run(
+ args=['cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'),
+ './kafka-server-stop.sh',
+ '{tir}/config/kafka.properties'.format(tir=get_kafka_dir(ctx, config)),
+ ],
+ )
+
+ time.sleep(5)
+
+ ctx.cluster.only(client).run(
+ args=['cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'),
+ './zookeeper-server-stop.sh',
+ '{tir}/config/zookeeper.properties'.format(tir=get_kafka_dir(ctx, config)),
+ ],
+ )
+
+ time.sleep(5)
+
+ ctx.cluster.only(client).run(args=['killall', '-9', 'java'])
+
+
+@contextlib.contextmanager
+def run_admin_cmds(ctx,config):
+ """
+ Running Kafka Admin commands in order to check the working of producer anf consumer and creation of topic.
+ """
+ assert isinstance(config, dict)
+ log.info('Checking kafka server through producer/consumer commands...')
+ for (client,_) in config.items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+
+ ctx.cluster.only(client).run(
+ args=[
+ 'cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'),
+ './kafka-topics.sh', '--create', '--topic', 'quickstart-events',
+ '--bootstrap-server', 'localhost:9092'
+ ],
+ )
+
+ ctx.cluster.only(client).run(
+ args=[
+ 'cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'),
+ 'echo', "First", run.Raw('|'),
+ './kafka-console-producer.sh', '--topic', 'quickstart-events',
+ '--bootstrap-server', 'localhost:9092'
+ ],
+ )
+
+ ctx.cluster.only(client).run(
+ args=[
+ 'cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'),
+ './kafka-console-consumer.sh', '--topic', 'quickstart-events',
+ '--from-beginning',
+ '--bootstrap-server', 'localhost:9092',
+ run.Raw('&'), 'exit'
+ ],
+ )
+
+ try:
+ yield
+ finally:
+ pass
+
+
+@contextlib.contextmanager
+def task(ctx,config):
+ """
+ Following is the way how to run kafka::
+ tasks:
+ - kafka:
+ client.0:
+ kafka_version: 2.6.0
+ """
+ assert config is None or isinstance(config, list) \
+ or isinstance(config, dict), \
+ "task kafka only supports a list or dictionary for configuration"
+
+ all_clients = ['client.{id}'.format(id=id_)
+ for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+ if config is None:
+ config = all_clients
+ if isinstance(config, list):
+ config = dict.fromkeys(config)
+
+ log.debug('Kafka config is %s', config)
+
+ with contextutil.nested(
+ lambda: install_kafka(ctx=ctx, config=config),
+ lambda: run_kafka(ctx=ctx, config=config),
+ lambda: run_admin_cmds(ctx=ctx, config=config),
+ ):
+ yield
+
diff --git a/qa/tasks/mgr/dashboard/helper.py b/qa/tasks/mgr/dashboard/helper.py
index e6a7c35a23d..55355048a36 100644
--- a/qa/tasks/mgr/dashboard/helper.py
+++ b/qa/tasks/mgr/dashboard/helper.py
@@ -220,13 +220,11 @@ class DashboardTestCase(MgrTestCase):
# To avoid any issues with e.g. unlink bugs, we destroy and recreate
# the filesystem rather than just doing a rm -rf of files
- cls.mds_cluster.mds_stop()
- cls.mds_cluster.mds_fail()
cls.mds_cluster.delete_all_filesystems()
+ cls.mds_cluster.mds_restart() # to reset any run-time configs, etc.
cls.fs = None # is now invalid!
cls.fs = cls.mds_cluster.newfs(create=True)
- cls.fs.mds_restart()
# In case some test messed with auth caps, reset them
# pylint: disable=not-an-iterable
diff --git a/qa/tasks/mgr/dashboard/test_mgr_module.py b/qa/tasks/mgr/dashboard/test_mgr_module.py
index d6a368905b6..1dbdef23d34 100644
--- a/qa/tasks/mgr/dashboard/test_mgr_module.py
+++ b/qa/tasks/mgr/dashboard/test_mgr_module.py
@@ -4,6 +4,7 @@ from __future__ import absolute_import
import logging
import requests
+from urllib3.exceptions import MaxRetryError
from .helper import (DashboardTestCase, JLeaf, JList, JObj,
module_options_object_schema, module_options_schema,
@@ -24,10 +25,11 @@ class MgrModuleTestCase(DashboardTestCase):
def _check_connection():
try:
# Try reaching an API endpoint successfully.
+ logger.info('Trying to reach the REST API endpoint')
self._get('/api/mgr/module')
if self._resp.status_code == 200:
return True
- except requests.ConnectionError:
+ except (MaxRetryError, requests.ConnectionError):
pass
return False
diff --git a/qa/tasks/mgr/dashboard/test_rbd.py b/qa/tasks/mgr/dashboard/test_rbd.py
index a872645e33e..83b3bf520c2 100644
--- a/qa/tasks/mgr/dashboard/test_rbd.py
+++ b/qa/tasks/mgr/dashboard/test_rbd.py
@@ -869,7 +869,19 @@ class RbdTest(DashboardTestCase):
self.assertEqual(clone_format_version, 2)
self.assertStatus(200)
+ # if empty list is sent, then the config will remain as it is
value = []
+ res = [{'section': "global", 'value': "2"}]
+ self._post('/api/cluster_conf', {
+ 'name': config_name,
+ 'value': value
+ })
+ self.wait_until_equal(
+ lambda: _get_config_by_name(config_name),
+ res,
+ timeout=60)
+
+ value = [{'section': "global", 'value': ""}]
self._post('/api/cluster_conf', {
'name': config_name,
'value': value
diff --git a/qa/tasks/mgr/dashboard/test_rgw.py b/qa/tasks/mgr/dashboard/test_rgw.py
index 5c7b0329675..a9071bc2a3a 100644
--- a/qa/tasks/mgr/dashboard/test_rgw.py
+++ b/qa/tasks/mgr/dashboard/test_rgw.py
@@ -785,7 +785,7 @@ class RgwUserSubuserTest(RgwTestCase):
'access': 'readwrite',
'key_type': 'swift'
})
- self.assertStatus(200)
+ self.assertStatus(201)
data = self.jsonBody()
subuser = self.find_object_in_list('id', 'teuth-test-user:tux', data)
self.assertIsInstance(subuser, object)
@@ -808,7 +808,7 @@ class RgwUserSubuserTest(RgwTestCase):
'access_key': 'yyy',
'secret_key': 'xxx'
})
- self.assertStatus(200)
+ self.assertStatus(201)
data = self.jsonBody()
subuser = self.find_object_in_list('id', 'teuth-test-user:hugo', data)
self.assertIsInstance(subuser, object)
diff --git a/qa/tasks/mgr/mgr_test_case.py b/qa/tasks/mgr/mgr_test_case.py
index 74b1e9d850c..4a5506391f2 100644
--- a/qa/tasks/mgr/mgr_test_case.py
+++ b/qa/tasks/mgr/mgr_test_case.py
@@ -1,5 +1,6 @@
import json
import logging
+import socket
from unittest import SkipTest
@@ -108,7 +109,7 @@ class MgrTestCase(CephTestCase):
# Unload all non-default plugins
loaded = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd(
"mgr", "module", "ls", "--format=json-pretty"))['enabled_modules']
- unload_modules = set(loaded) - {"cephadm", "restful"}
+ unload_modules = set(loaded) - {"cephadm"}
for m in unload_modules:
cls.mgr_cluster.mon_manager.raw_cluster_cmd(
@@ -137,7 +138,7 @@ class MgrTestCase(CephTestCase):
raise SkipTest(
"Only have {0} manager daemons, {1} are required".format(
len(cls.mgr_cluster.mgr_ids), cls.MGRS_REQUIRED))
-
+
# We expect laggy OSDs in this testing environment so turn off this warning.
# See https://tracker.ceph.com/issues/61907
cls.mgr_cluster.mon_manager.raw_cluster_cmd('config', 'set', 'mds',
@@ -229,15 +230,22 @@ class MgrTestCase(CephTestCase):
"""
# Start handing out ports well above Ceph's range.
assign_port = min_port
+ ip_addr = cls.mgr_cluster.get_mgr_map()['active_addr'].split(':')[0]
for mgr_id in cls.mgr_cluster.mgr_ids:
cls.mgr_cluster.mgr_stop(mgr_id)
cls.mgr_cluster.mgr_fail(mgr_id)
+
for mgr_id in cls.mgr_cluster.mgr_ids:
- log.debug("Using port {0} for {1} on mgr.{2}".format(
- assign_port, module_name, mgr_id
- ))
+ # Find a port that isn't in use
+ while True:
+ if not cls.is_port_in_use(ip_addr, assign_port):
+ break
+ log.debug(f"Port {assign_port} in use, trying next")
+ assign_port += 1
+
+ log.debug(f"Using port {assign_port} for {module_name} on mgr.{mgr_id}")
cls.mgr_cluster.set_module_localized_conf(module_name, mgr_id,
config_name,
str(assign_port),
@@ -255,3 +263,8 @@ class MgrTestCase(CephTestCase):
mgr_map['active_name'], mgr_map['active_gid']))
return done
cls.wait_until_true(is_available, timeout=30)
+
+ @classmethod
+ def is_port_in_use(cls, ip_addr: str, port: int) -> bool:
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+ return s.connect_ex((ip_addr, port)) == 0
diff --git a/qa/tasks/mgr/test_module_selftest.py b/qa/tasks/mgr/test_module_selftest.py
index 7ac2960371c..c41a95c71f7 100644
--- a/qa/tasks/mgr/test_module_selftest.py
+++ b/qa/tasks/mgr/test_module_selftest.py
@@ -36,13 +36,6 @@ class TestModuleSelftest(MgrTestCase):
self.mgr_cluster.mon_manager.raw_cluster_cmd(
"mgr", "self-test", "module", module_name)
- def test_zabbix(self):
- # Set these mandatory config fields so that the zabbix module
- # won't trigger health/log errors on load/serve.
- self.mgr_cluster.set_module_conf("zabbix", "zabbix_host", "localhost")
- self.mgr_cluster.set_module_conf("zabbix", "identifier", "foo")
- self._selftest_plugin("zabbix")
-
def test_prometheus(self):
self._assign_ports("prometheus", "server_port", min_port=8100)
self._selftest_plugin("prometheus")
diff --git a/qa/tasks/notification_tests.py b/qa/tasks/notification_tests.py
index b4697a6f797..f1eae3c89c4 100644
--- a/qa/tasks/notification_tests.py
+++ b/qa/tasks/notification_tests.py
@@ -220,7 +220,7 @@ def run_tests(ctx, config):
for client, client_config in config.items():
(remote,) = ctx.cluster.only(client).remotes.keys()
- attr = ["!kafka_test", "!data_path_v2_kafka_test", "!amqp_test", "!amqp_ssl_test", "!kafka_security_test", "!modification_required", "!manual_test", "!http_test"]
+ attr = ["!kafka_test", "!data_path_v2_kafka_test", "!kafka_failover", "!amqp_test", "!amqp_ssl_test", "!kafka_security_test", "!modification_required", "!manual_test", "!http_test"]
if 'extra_attr' in client_config:
attr = client_config.get('extra_attr')
diff --git a/qa/tasks/nvme_loop.py b/qa/tasks/nvme_loop.py
index fef270ea085..fdec467a16d 100644
--- a/qa/tasks/nvme_loop.py
+++ b/qa/tasks/nvme_loop.py
@@ -70,7 +70,7 @@ def task(ctx, config):
remote.run(args=['lsblk'], stdout=StringIO())
p = remote.run(args=['sudo', 'nvme', 'list', '-o', 'json'], stdout=StringIO())
new_devs = []
- # `nvme list -o json` will return the following output:
+ # `nvme list -o json` will return one of the following output:
'''{
"Devices" : [
{
@@ -91,13 +91,112 @@ def task(ctx, config):
}
]
}'''
+ '''{
+ "Devices":[
+ {
+ "HostNQN":"nqn.2014-08.org.nvmexpress:uuid:00000000-0000-0000-0000-0cc47ada6ba4",
+ "HostID":"898a0e10-da2d-4a42-8017-d9c445089d0c",
+ "Subsystems":[
+ {
+ "Subsystem":"nvme-subsys0",
+ "SubsystemNQN":"nqn.2014.08.org.nvmexpress:80868086CVFT623300LN400BGN INTEL SSDPEDMD400G4",
+ "Controllers":[
+ {
+ "Controller":"nvme0",
+ "Cntlid":"0",
+ "SerialNumber":"CVFT623300LN400BGN",
+ "ModelNumber":"INTEL SSDPEDMD400G4",
+ "Firmware":"8DV101H0",
+ "Transport":"pcie",
+ "Address":"0000:02:00.0",
+ "Slot":"2",
+ "Namespaces":[
+ {
+ "NameSpace":"nvme0n1",
+ "Generic":"ng0n1",
+ "NSID":1,
+ "UsedBytes":400088457216,
+ "MaximumLBA":781422768,
+ "PhysicalSize":400088457216,
+ "SectorSize":512
+ }
+ ],
+ "Paths":[
+ ]
+ }
+ ],
+ "Namespaces":[
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ '''
+ '''{
+ "Devices":[
+ {
+ "HostNQN":"nqn.2014-08.org.nvmexpress:uuid:00000000-0000-0000-0000-0cc47ada6ba4",
+ "HostID":"898a0e10-da2d-4a42-8017-d9c445089d0c",
+ "Subsystems":[
+ {
+ "Subsystem":"nvme-subsys0",
+ "SubsystemNQN":"nqn.2014.08.org.nvmexpress:80868086CVFT534400C2400BGN INTEL SSDPEDMD400G4",
+ "Controllers":[
+ {
+ "Controller":"nvme0",
+ "Cntlid":"0",
+ "SerialNumber":"CVFT534400C2400BGN",
+ "ModelNumber":"INTEL SSDPEDMD400G4",
+ "Firmware":"8DV101H0",
+ "Transport":"pcie",
+ "Address":"0000:02:00.0",
+ "Slot":"2",
+ "Namespaces":[
+ {
+ "NameSpace":"nvme0n1",
+ "Generic":"ng0n1",
+ "NSID":1,
+ "UsedBytes":400088457216,
+ "MaximumLBA":781422768,
+ "PhysicalSize":400088457216,
+ "SectorSize":512
+ }
+ ],
+ "Paths":[
+ ]
+ }
+ ],
+ "Namespaces":[
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ '''
nvme_list = json.loads(p.stdout.getvalue())
for device in nvme_list['Devices']:
- dev = device['DevicePath']
- vendor = device['ModelNumber']
- if dev.startswith('/dev/') and vendor == 'Linux':
- new_devs.append(dev)
- bluestore_zap(remote, dev)
+ try:
+ # first try format 1 / older format
+ dev = device['DevicePath']
+ vendor = device['ModelNumber']
+ if dev.startswith('/dev/') and vendor == 'Linux':
+ new_devs.append(dev)
+ bluestore_zap(remote, dev)
+ except KeyError:
+ for subsystem in device['Subsystems']:
+ # format 2
+ if 'Namespaces' in subsystem and subsystem['Namespaces']:
+ dev = '/dev/' + subsystem['Namespaces'][0]['NameSpace']
+ # try format 3 last
+ else:
+ dev = '/dev/' + subsystem['Controllers'][0]['Namespaces'][0]['NameSpace']
+ # vendor is the same for format 2 and 3
+ vendor = subsystem['Controllers'][0]['ModelNumber']
+ if vendor == 'Linux':
+ new_devs.append(dev)
+ bluestore_zap(remote, dev)
log.info(f'new_devs {new_devs}')
assert len(new_devs) <= len(devs)
if len(new_devs) == len(devs):
diff --git a/qa/tasks/nvmeof.py b/qa/tasks/nvmeof.py
index b89f123c97e..691a6f7dd86 100644
--- a/qa/tasks/nvmeof.py
+++ b/qa/tasks/nvmeof.py
@@ -32,6 +32,7 @@ class Nvmeof(Task):
gateway_config:
namespaces_count: 10
cli_version: latest
+ create_mtls_secrets: False
"""
@@ -69,6 +70,7 @@ class Nvmeof(Task):
self.serial = gateway_config.get('serial', 'SPDK00000000000001')
self.port = gateway_config.get('port', '4420')
self.srport = gateway_config.get('srport', '5500')
+ self.create_mtls_secrets = gateway_config.get('create_mtls_secrets', False)
def deploy_nvmeof(self):
"""
@@ -126,12 +128,11 @@ class Nvmeof(Task):
total_images = int(self.namespaces_count) * int(self.subsystems_count)
log.info(f'[nvmeof]: creating {total_images} images')
+ rbd_create_cmd = []
for i in range(1, total_images + 1):
imagename = self.image_name_prefix + str(i)
- log.info(f'[nvmeof]: rbd create {poolname}/{imagename} --size {self.rbd_size}')
- _shell(self.ctx, self.cluster_name, self.remote, [
- 'rbd', 'create', f'{poolname}/{imagename}', '--size', f'{self.rbd_size}'
- ])
+ rbd_create_cmd += ['rbd', 'create', f'{poolname}/{imagename}', '--size', f'{self.rbd_size}', run.Raw(';')]
+ _shell(self.ctx, self.cluster_name, self.remote, rbd_create_cmd)
for role, i in daemons.items():
remote, id_ = i
@@ -147,7 +148,38 @@ class Nvmeof(Task):
started=True,
)
log.info("[nvmeof]: executed deploy_nvmeof successfully!")
-
+
+ def write_mtls_config(self, gateway_ips):
+ log.info("[nvmeof]: writing mtls config...")
+ allowed_ips = ""
+ for ip in gateway_ips:
+ allowed_ips += ("IP:" + ip + ",")
+ self.remote.run(
+ args=[
+ "sudo", "openssl", "req", "-x509", "-newkey", "rsa:4096", "-nodes", "-keyout", "/etc/ceph/server.key",
+ "-out", "/etc/ceph/server.crt", "-days", "3650", "-subj", "/CN=my.server", "-addext", f"subjectAltName={allowed_ips[:-1]}"
+ ]
+ )
+ self.remote.run(
+ args=[
+ "sudo", "openssl", "req", "-x509", "-newkey", "rsa:4096", "-nodes", "-keyout", "/etc/ceph/client.key",
+ "-out", "/etc/ceph/client.crt", "-days", "3650", "-subj", "/CN=client1"
+ ]
+ )
+ secrets_files = {"/etc/ceph/server.key": None,
+ "/etc/ceph/server.crt": None,
+ "/etc/ceph/client.key": None,
+ "/etc/ceph/client.crt": None,
+ }
+ for file in secrets_files.keys():
+ secrets_files[file] = self.remote.read_file(path=file, sudo=True)
+
+ for remote in self.ctx.cluster.remotes.keys():
+ for remote_file in secrets_files.keys():
+ data = secrets_files[remote_file]
+ remote.sudo_write_file(path=remote_file, data=data, mode='0644')
+ log.info("[nvmeof]: written mtls config!")
+
def set_gateway_cfg(self):
log.info('[nvmeof]: running set_gateway_cfg...')
ip_address = self.remote.ip_address
@@ -174,6 +206,8 @@ class Nvmeof(Task):
data=conf_data,
sudo=True
)
+ if self.create_mtls_secrets:
+ self.write_mtls_config(gateway_ips)
log.info("[nvmeof]: executed set_gateway_cfg successfully!")
@@ -216,9 +250,9 @@ class NvmeofThrasher(Thrasher, Greenlet):
daemon_max_thrash_times:
For now, NVMeoF daemons have limitation that each daemon can
- be thrashed only 3 times in span of 30 mins. This option
+ be thrashed only 5 times in span of 30 mins. This option
allows to set the amount of times it could be thrashed in a period
- of time. (default: 3)
+ of time. (default: 5)
daemon_max_thrash_period:
This option goes with the above option. It sets the period of time
over which each daemons can be thrashed for daemon_max_thrash_times
@@ -271,17 +305,17 @@ class NvmeofThrasher(Thrasher, Greenlet):
self.max_thrash_daemons = int(self.config.get('max_thrash', len(self.daemons) - 1))
# Limits on thrashing each daemon
- self.daemon_max_thrash_times = int(self.config.get('daemon_max_thrash_times', 3))
+ self.daemon_max_thrash_times = int(self.config.get('daemon_max_thrash_times', 5))
self.daemon_max_thrash_period = int(self.config.get('daemon_max_thrash_period', 30 * 60)) # seconds
self.min_thrash_delay = int(self.config.get('min_thrash_delay', 60))
self.max_thrash_delay = int(self.config.get('max_thrash_delay', self.min_thrash_delay + 30))
- self.min_revive_delay = int(self.config.get('min_revive_delay', 100))
+ self.min_revive_delay = int(self.config.get('min_revive_delay', 60))
self.max_revive_delay = int(self.config.get('max_revive_delay', self.min_revive_delay + 30))
def _get_devices(self, remote):
GET_DEVICE_CMD = "sudo nvme list --output-format=json | " \
- "jq -r '.Devices | sort_by(.NameSpace) | .[] | select(.ModelNumber == \"Ceph bdev Controller\") | .DevicePath'"
+ "jq -r '.Devices[].Subsystems[] | select(.Controllers | all(.ModelNumber == \"Ceph bdev Controller\")) | .Namespaces | sort_by(.NSID) | .[] | .NameSpace'"
devices = remote.sh(GET_DEVICE_CMD).split()
return devices
@@ -312,6 +346,7 @@ class NvmeofThrasher(Thrasher, Greenlet):
run.Raw('&&'), 'ceph', 'orch', 'ps', '--daemon-type', 'nvmeof',
run.Raw('&&'), 'ceph', 'health', 'detail',
run.Raw('&&'), 'ceph', '-s',
+ run.Raw('&&'), 'sudo', 'nvme', 'list',
]
for dev in self.devices:
check_cmd += [
@@ -386,13 +421,11 @@ class NvmeofThrasher(Thrasher, Greenlet):
while not self.stopping.is_set():
killed_daemons = defaultdict(list)
- weight = 1.0 / len(self.daemons)
- count = 0
+ thrash_daemon_num = self.rng.randint(1, self.max_thrash_daemons)
+ selected_daemons = self.rng.sample(self.daemons, thrash_daemon_num)
for daemon in self.daemons:
- skip = self.rng.uniform(0.0, 1.0)
- if weight <= skip:
- self.log('skipping daemon {label} with skip ({skip}) > weight ({weight})'.format(
- label=daemon.id_, skip=skip, weight=weight))
+ if daemon not in selected_daemons:
+ self.log(f'skipping daemon {daemon.id_} ...')
continue
# For now, nvmeof daemons can only be thrashed 3 times in last 30mins.
@@ -410,17 +443,11 @@ class NvmeofThrasher(Thrasher, Greenlet):
continue
self.log('kill {label}'.format(label=daemon.id_))
- # daemon.stop()
kill_method = self.kill_daemon(daemon)
killed_daemons[kill_method].append(daemon)
daemons_thrash_history[daemon.id_] += [datetime.now()]
- # only thrash max_thrash_daemons amount of daemons
- count += 1
- if count >= self.max_thrash_daemons:
- break
-
if killed_daemons:
iteration_summary = "thrashed- "
for kill_method in killed_daemons:
@@ -433,7 +460,7 @@ class NvmeofThrasher(Thrasher, Greenlet):
self.log(f'waiting for {revive_delay} secs before reviving')
time.sleep(revive_delay) # blocking wait
- self.log('done waiting before reviving')
+ self.log(f'done waiting before reviving - iteration #{len(summary)}: {iteration_summary}')
self.do_checks()
self.switch_task()
@@ -452,7 +479,7 @@ class NvmeofThrasher(Thrasher, Greenlet):
if thrash_delay > 0.0:
self.log(f'waiting for {thrash_delay} secs before thrashing')
time.sleep(thrash_delay) # blocking
- self.log('done waiting before thrashing')
+ self.log('done waiting before thrashing - everything should be up now')
self.do_checks()
self.switch_task()
diff --git a/qa/tasks/rados.py b/qa/tasks/rados.py
index d8eac5d886f..96bcc770511 100644
--- a/qa/tasks/rados.py
+++ b/qa/tasks/rados.py
@@ -36,6 +36,8 @@ def task(ctx, config):
write_fadvise_dontneed: write behavior like with LIBRADOS_OP_FLAG_FADVISE_DONTNEED.
This mean data don't access in the near future.
Let osd backend don't keep data in cache.
+ pct_update_delay: delay before primary propogates pct on write pause,
+ defaults to 5s if balance_reads is set
For example::
@@ -139,6 +141,7 @@ def task(ctx, config):
object_size = int(config.get('object_size', 4000000))
op_weights = config.get('op_weights', {})
testdir = teuthology.get_testdir(ctx)
+ pct_update_delay = None
args = [
'adjust-ulimits',
'ceph-coverage',
@@ -166,6 +169,7 @@ def task(ctx, config):
args.extend(['--pool-snaps'])
if config.get('balance_reads', False):
args.extend(['--balance-reads'])
+ pct_update_delay = config.get('pct_update_delay', 5);
if config.get('localize_reads', False):
args.extend(['--localize-reads'])
if config.get('max_attr_len', None):
@@ -274,6 +278,10 @@ def task(ctx, config):
if config.get('fast_read', False):
manager.raw_cluster_cmd(
'osd', 'pool', 'set', pool, 'fast_read', 'true')
+ if pct_update_delay:
+ manager.raw_cluster_cmd(
+ 'osd', 'pool', 'set', pool,
+ 'pct_update_delay', str(pct_update_delay));
min_size = config.get('min_size', None);
if min_size is not None:
manager.raw_cluster_cmd(
diff --git a/qa/tasks/radosgw_admin.py b/qa/tasks/radosgw_admin.py
index 3b98702acca..fb82378761b 100644
--- a/qa/tasks/radosgw_admin.py
+++ b/qa/tasks/radosgw_admin.py
@@ -16,6 +16,7 @@ import logging
import time
import datetime
import sys
+import errno
from io import StringIO
from queue import Queue
@@ -725,6 +726,40 @@ def task(ctx, config):
(err, out) = rgwadmin(ctx, client, ['user', 'rm', '--tenant', tenant_name, '--uid', 'tenanteduser'],
check_status=True)
+ account_id = 'RGW12312312312312312'
+ account_name = 'testacct'
+ rgwadmin(ctx, client, [
+ 'account', 'create',
+ '--account-id', account_id,
+ '--account-name', account_name,
+ ], check_status=True)
+ rgwadmin(ctx, client, [
+ 'user', 'create',
+ '--account-id', account_id,
+ '--uid', 'testacctuser',
+ '--display-name', 'accountuser',
+ '--gen-access-key',
+ '--gen-secret',
+ ], check_status=True)
+
+ # TESTCASE 'bucket link', 'bucket', 'account user', 'fails'
+ (err, out) = rgwadmin(ctx, client, ['bucket', 'link', '--bucket', bucket_name, '--uid', 'testacctuser'])
+ assert err == errno.EINVAL
+
+ rgwadmin(ctx, client, ['user', 'rm', '--uid', 'testacctuser'], check_status=True)
+
+ # TESTCASE 'bucket link', 'bucket', 'account', 'succeeds'
+ rgwadmin(ctx, client,
+ ['bucket', 'link', '--bucket', bucket_name, '--account-id', account_id],
+ check_status=True)
+
+ # relink the bucket to the first user and delete the account
+ rgwadmin(ctx, client,
+ ['bucket', 'link', '--bucket', bucket_name, '--uid', user1],
+ check_status=True)
+ rgwadmin(ctx, client, ['account', 'rm', '--account-id', account_id],
+ check_status=True)
+
# TESTCASE 'object-rm', 'object', 'rm', 'remove object', 'succeeds, object is removed'
# upload an object
diff --git a/qa/tasks/rgw_multisite.py b/qa/tasks/rgw_multisite.py
index e83a54efc2b..f93ca017fa2 100644
--- a/qa/tasks/rgw_multisite.py
+++ b/qa/tasks/rgw_multisite.py
@@ -361,6 +361,8 @@ def create_zonegroup(cluster, gateways, period, config):
if endpoints:
# replace client names with their gateway endpoints
config['endpoints'] = extract_gateway_endpoints(gateways, endpoints)
+ if not config.get('api_name'): # otherwise it will be set to an empty string
+ config['api_name'] = config['name']
zonegroup = multisite.ZoneGroup(config['name'], period)
# `zonegroup set` needs --default on command line, and 'is_master' in json
args = is_default_arg(config)
diff --git a/qa/tasks/rook.py b/qa/tasks/rook.py
index 6cb75173966..fae5ef3bf00 100644
--- a/qa/tasks/rook.py
+++ b/qa/tasks/rook.py
@@ -8,7 +8,7 @@ import json
import logging
import os
import yaml
-from io import BytesIO
+from io import BytesIO, StringIO
from tarfile import ReadError
from tasks.ceph_manager import CephManager
@@ -235,10 +235,14 @@ def ceph_log(ctx, config):
r = ctx.rook[cluster_name].remote.run(
stdout=BytesIO(),
args=args,
+ stderr=StringIO(),
)
stdout = r.stdout.getvalue().decode()
if stdout:
return stdout
+ stderr = r.stderr.getvalue()
+ if stderr:
+ return stderr
return None
if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
diff --git a/qa/tasks/s3a_hadoop.py b/qa/tasks/s3a_hadoop.py
index 7b77359fcf2..4518a6f397c 100644
--- a/qa/tasks/s3a_hadoop.py
+++ b/qa/tasks/s3a_hadoop.py
@@ -1,5 +1,6 @@
import contextlib
import logging
+import os
from teuthology import misc
from teuthology.orchestra import run
@@ -40,7 +41,7 @@ def task(ctx, config):
# get versions
maven_major = config.get('maven-major', 'maven-3')
- maven_version = config.get('maven-version', '3.6.3')
+ maven_version = config.get('maven-version', '3.9.9')
hadoop_ver = config.get('hadoop-version', '2.9.2')
bucket_name = config.get('bucket-name', 's3atest')
access_key = config.get('access-key', 'EGAQRD2ULOIFKFSKCT4F')
@@ -48,11 +49,19 @@ def task(ctx, config):
'secret-key',
'zi816w1vZKfaSM85Cl0BxXTwSLyN7zB4RbTswrGb')
+ # programmatically find a nearby mirror so as not to hammer archive.apache.org
+ apache_mirror_cmd="curl 'https://www.apache.org/dyn/closer.cgi' 2>/dev/null | " \
+ "grep -o '<strong>[^<]*</strong>' | sed 's/<[^>]*>//g' | head -n 1"
+ log.info("determining apache mirror by running: " + apache_mirror_cmd)
+ apache_mirror_url_front = os.popen(apache_mirror_cmd).read().rstrip() # note: includes trailing slash (/)
+ log.info("chosen apache mirror is " + apache_mirror_url_front)
+
# set versions for cloning the repo
apache_maven = 'apache-maven-{maven_version}-bin.tar.gz'.format(
maven_version=maven_version)
- maven_link = 'http://archive.apache.org/dist/maven/' + \
- '{maven_major}/{maven_version}/binaries/'.format(maven_major=maven_major, maven_version=maven_version) + apache_maven
+ maven_link = '{apache_mirror_url_front}/maven/'.format(apache_mirror_url_front=apache_mirror_url_front) + \
+ '{maven_major}/{maven_version}/binaries/'.format(maven_major=maven_major, maven_version=maven_version) + \
+ apache_maven
hadoop_git = 'https://github.com/apache/hadoop'
hadoop_rel = 'hadoop-{ver} rel/release-{ver}'.format(ver=hadoop_ver)
if hadoop_ver == 'trunk':
@@ -204,6 +213,7 @@ def run_s3atest(client, maven_version, testdir, test_options):
run.Raw('&&'),
run.Raw(rm_test),
run.Raw('&&'),
+ run.Raw('JAVA_HOME=$(alternatives --list | grep jre_1.8.0 | head -n 1 | awk \'{print $3}\')'),
run.Raw(run_test),
run.Raw(test_options)
]
diff --git a/qa/tasks/s3tests.py b/qa/tasks/s3tests.py
index 6d7b39d5892..85ab97d23cd 100644
--- a/qa/tasks/s3tests.py
+++ b/qa/tasks/s3tests.py
@@ -57,6 +57,17 @@ def download(ctx, config):
'git', 'reset', '--hard', sha1,
],
)
+ if client_config.get('boto3_extensions'):
+ ctx.cluster.only(client).run(
+ args=['mkdir',
+ '-p',
+ '/home/ubuntu/.aws/models/s3/2006-03-01/']
+ )
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ remote_file = '/home/ubuntu/.aws/models/s3/2006-03-01/service-2.sdk-extras.json'
+ local_file = '{qadir}/../examples/rgw/boto3/service-2.sdk-extras.json'.format(qadir=ctx.config.get('suite_path'))
+ remote.put_file(local_file, remote_file)
+
try:
yield
finally:
@@ -70,6 +81,17 @@ def download(ctx, config):
'{tdir}/s3-tests-{client}'.format(tdir=testdir, client=client),
],
)
+ if client_config.get('boto3_extensions'):
+ ctx.cluster.only(client).run(
+ args=[
+ 'rm', '-rf', '/home/ubuntu/.aws/models/s3/2006-03-01/service-2.sdk-extras.json',
+ ],
+ )
+ ctx.cluster.only(client).run(
+ args=[
+ 'cd', '/home/ubuntu/', run.Raw('&&'), 'rmdir', '-p', '.aws/models/s3/2006-03-01/',
+ ],
+ )
def _config_user(s3tests_conf, section, user, email):
@@ -444,8 +466,10 @@ def run_tests(ctx, config):
attrs += ['not fails_with_subdomain']
if not client_config.get('with-sse-s3'):
attrs += ['not sse_s3']
-
+
attrs += client_config.get('extra_attrs', [])
+ if 'bucket_logging' not in attrs:
+ attrs += ['not bucket_logging']
if 'unit_test_scan' in client_config and client_config['unit_test_scan']:
xmlfile_id = datetime.datetime.now().strftime("%Y-%m-%d-%H:%M:%S--") + str(uuid.uuid4())
xmlpath= f'{testdir}/archive/s3test-{xmlfile_id}.xml'
diff --git a/qa/tasks/s3tests_java.py b/qa/tasks/s3tests_java.py
index 3e20e10d06c..a58aa6cf0b4 100644
--- a/qa/tasks/s3tests_java.py
+++ b/qa/tasks/s3tests_java.py
@@ -284,6 +284,7 @@ class S3tests_java(Task):
args = ['cd',
'{tdir}/s3-tests-java'.format(tdir=testdir),
run.Raw('&&'),
+ run.Raw('JAVA_HOME=$(alternatives --list | grep jre_1.8.0 | head -n 1 | awk \'{print $3}\')'),
'/opt/gradle/gradle/bin/gradle', 'clean', 'test',
'--rerun-tasks', '--no-build-cache',
]
diff --git a/qa/tasks/stretch_mode_disable_enable.py b/qa/tasks/stretch_mode_disable_enable.py
new file mode 100644
index 00000000000..a84a85bb307
--- /dev/null
+++ b/qa/tasks/stretch_mode_disable_enable.py
@@ -0,0 +1,547 @@
+import logging
+from tasks.mgr.mgr_test_case import MgrTestCase
+
+log = logging.getLogger(__name__)
+
+class TestStretchMode(MgrTestCase):
+ """
+ Test the stretch mode feature of Ceph
+ """
+ POOL = 'stretch_pool'
+ CLUSTER = "ceph"
+ WRITE_PERIOD = 10
+ RECOVERY_PERIOD = WRITE_PERIOD * 6
+ SUCCESS_HOLD_TIME = 7
+ STRETCH_CRUSH_RULE = 'stretch_rule'
+ STRETCH_CRUSH_RULE_ID = None
+ STRETCH_BUCKET_TYPE = 'datacenter'
+ TIEBREAKER_MON_NAME = 'e'
+ DEFAULT_POOL_TYPE = 'replicated'
+ DEFAULT_POOL_CRUSH_RULE = 'replicated_rule'
+ DEFAULT_POOL_SIZE = 3
+ DEFAULT_POOL_MIN_SIZE = 2
+ DEFAULT_POOL_CRUSH_RULE_ID = None
+ # This dictionary maps the datacenter to the osd ids and hosts
+ DC_OSDS = {
+ 'dc1': {
+ "host01": [0, 1],
+ "host02": [2, 3],
+ },
+ 'dc2': {
+ "host03": [4, 5],
+ "host04": [6, 7],
+ },
+ }
+ DC_MONS = {
+ 'dc1': {
+ "host01": ['a'],
+ "host02": ['b'],
+ },
+ 'dc2': {
+ "host03": ['c'],
+ "host04": ['d'],
+ },
+ 'dc3': {
+ "host05": ['e'],
+ }
+ }
+ def _osd_count(self):
+ """
+ Get the number of OSDs in the cluster.
+ """
+ osd_map = self.mgr_cluster.mon_manager.get_osd_dump_json()
+ return len(osd_map['osds'])
+
+ def setUp(self):
+ """
+ Setup the cluster and
+ ensure we have a clean condition before the test.
+ """
+ # Ensure we have at least 6 OSDs
+ super(TestStretchMode, self).setUp()
+ self.DEFAULT_POOL_CRUSH_RULE_ID = self.mgr_cluster.mon_manager.get_crush_rule_id(self.DEFAULT_POOL_CRUSH_RULE)
+ self.STRETCH_CRUSH_RULE_ID = self.mgr_cluster.mon_manager.get_crush_rule_id(self.STRETCH_CRUSH_RULE)
+ if self._osd_count() < 4:
+ self.skipTest("Not enough OSDS!")
+
+ # Remove any filesystems so that we can remove their pools
+ if self.mds_cluster:
+ self.mds_cluster.mds_stop()
+ self.mds_cluster.mds_fail()
+ self.mds_cluster.delete_all_filesystems()
+
+ # Remove all other pools
+ for pool in self.mgr_cluster.mon_manager.get_osd_dump_json()['pools']:
+ try:
+ self.mgr_cluster.mon_manager.remove_pool(pool['pool_name'])
+ except:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'osd', 'pool', 'delete',
+ pool['pool_name'],
+ pool['pool_name'],
+ '--yes-i-really-really-mean-it')
+
+ def _setup_pool(
+ self,
+ pool_name=POOL,
+ pg_num=16,
+ pool_type=DEFAULT_POOL_TYPE,
+ crush_rule=DEFAULT_POOL_CRUSH_RULE,
+ size=None,
+ min_size=None
+ ):
+ """
+ Create a pool, set its size and pool if specified.
+ """
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'osd', 'pool', 'create', pool_name, str(pg_num), pool_type, crush_rule)
+
+ if size is not None:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'osd', 'pool', 'set', pool_name, 'size', str(size))
+
+ if min_size is not None:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'osd', 'pool', 'set', pool_name, 'min_size', str(min_size))
+
+ def _write_some_data(self, t):
+ """
+ Write some data to the pool to simulate a workload.
+ """
+ args = [
+ "rados", "-p", self.POOL, "bench", str(t), "write", "-t", "16"]
+ self.mgr_cluster.admin_remote.run(args=args, wait=True)
+
+ def _get_all_mons_from_all_dc(self):
+ """
+ Get all mons from all datacenters.
+ """
+ return [mon for dc in self.DC_MONS.values() for mons in dc.values() for mon in mons]
+
+ def _bring_back_mon(self, mon):
+ """
+ Bring back the mon.
+ """
+ try:
+ self.ctx.daemons.get_daemon('mon', mon, self.CLUSTER).restart()
+ except Exception:
+ log.error("Failed to bring back mon.{}".format(str(mon)))
+ pass
+
+ def _get_host(self, osd):
+ """
+ Get the host of the osd.
+ """
+ for dc, nodes in self.DC_OSDS.items():
+ for node, osds in nodes.items():
+ if osd in osds:
+ return node
+ return None
+
+ def _move_osd_back_to_host(self, osd):
+ """
+ Move the osd back to the host.
+ """
+ host = self._get_host(osd)
+ assert host is not None, "The host of osd {} is not found.".format(osd)
+ log.debug("Moving osd.%d back to %s", osd, host)
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'osd', 'crush', 'move', 'osd.{}'.format(str(osd)),
+ 'host={}'.format(host)
+ )
+
+ def tearDown(self):
+ """
+ Clean up the cluster after the test.
+ """
+ # Remove the pool
+ if self.POOL in self.mgr_cluster.mon_manager.pools:
+ self.mgr_cluster.mon_manager.remove_pool(self.POOL)
+
+ osd_map = self.mgr_cluster.mon_manager.get_osd_dump_json()
+ for osd in osd_map['osds']:
+ # mark all the osds in
+ if osd['weight'] == 0.0:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'osd', 'in', str(osd['osd']))
+ # Bring back all the osds and move it back to the host.
+ if osd['up'] == 0:
+ self.mgr_cluster.mon_manager.revive_osd(osd['osd'])
+ self._move_osd_back_to_host(osd['osd'])
+
+ # Bring back all the mons
+ mons = self._get_all_mons_from_all_dc()
+ for mon in mons:
+ self._bring_back_mon(mon)
+ super(TestStretchMode, self).tearDown()
+
+ def _kill_osd(self, osd):
+ """
+ Kill the osd.
+ """
+ try:
+ self.ctx.daemons.get_daemon('osd', osd, self.CLUSTER).stop()
+ except Exception:
+ log.error("Failed to stop osd.{}".format(str(osd)))
+ pass
+
+ def _get_osds_data(self, want_osds):
+ """
+ Get the osd data
+ """
+ all_osds_data = \
+ self.mgr_cluster.mon_manager.get_osd_dump_json()['osds']
+ return [
+ osd_data for osd_data in all_osds_data
+ if int(osd_data['osd']) in want_osds
+ ]
+
+ def _get_osds_by_dc(self, dc):
+ """
+ Get osds by datacenter.
+ """
+ ret = []
+ for host, osds in self.DC_OSDS[dc].items():
+ ret.extend(osds)
+ return ret
+
+ def _fail_over_all_osds_in_dc(self, dc):
+ """
+ Fail over all osds in specified <datacenter>
+ """
+ if not isinstance(dc, str):
+ raise ValueError("dc must be a string")
+ if dc not in self.DC_OSDS:
+ raise ValueError(
+ "dc must be one of the following: %s" % self.DC_OSDS.keys()
+ )
+ log.debug("Failing over all osds in %s", dc)
+ osds = self._get_osds_by_dc(dc)
+ # fail over all the OSDs in the DC
+ log.debug("OSDs to failed over: %s", osds)
+ for osd_id in osds:
+ self._kill_osd(osd_id)
+ # wait until all the osds are down
+ self.wait_until_true(
+ lambda: all([int(osd['up']) == 0
+ for osd in self._get_osds_data(osds)]),
+ timeout=self.RECOVERY_PERIOD
+ )
+
+ def _check_mons_out_of_quorum(self, want_mons):
+ """
+ Check if the mons are not in quorum.
+ """
+ quorum_names = self.mgr_cluster.mon_manager.get_mon_quorum_names()
+ return all([mon not in quorum_names for mon in want_mons])
+
+ def _kill_mon(self, mon):
+ """
+ Kill the mon.
+ """
+ try:
+ self.ctx.daemons.get_daemon('mon', mon, self.CLUSTER).stop()
+ except Exception:
+ log.error("Failed to stop mon.{}".format(str(mon)))
+ pass
+
+ def _get_mons_by_dc(self, dc):
+ """
+ Get mons by datacenter.
+ """
+ ret = []
+ for host, mons in self.DC_MONS[dc].items():
+ ret.extend(mons)
+ return ret
+
+ def _fail_over_all_mons_in_dc(self, dc):
+ """
+ Fail over all mons in the specified <datacenter>
+ """
+ if not isinstance(dc, str):
+ raise ValueError("dc must be a string")
+ if dc not in self.DC_MONS:
+ raise ValueError("dc must be one of the following: %s" %
+ ", ".join(self.DC_MONS.keys()))
+ log.debug("Failing over all mons %s", dc)
+ mons = self._get_mons_by_dc(dc)
+ log.debug("Mons to be failed over: %s", mons)
+ for mon in mons:
+ self._kill_mon(mon)
+ # wait until all the mons are out of quorum
+ self.wait_until_true(
+ lambda: self._check_mons_out_of_quorum(mons),
+ timeout=self.RECOVERY_PERIOD
+ )
+
+ def _stretch_mode_enabled_correctly(self):
+ """
+ Evaluate whether the stretch mode is enabled correctly.
+ by checking the OSDMap and MonMap.
+ """
+ # Checking the OSDMap
+ osdmap = self.mgr_cluster.mon_manager.get_osd_dump_json()
+ for pool in osdmap['pools']:
+ # expects crush_rule to be stretch_rule
+ self.assertEqual(
+ self.STRETCH_CRUSH_RULE_ID,
+ pool['crush_rule']
+ )
+ # expects pool size to be 4
+ self.assertEqual(
+ 4,
+ pool['size']
+ )
+ # expects pool min_size to be 2
+ self.assertEqual(
+ 2,
+ pool['min_size']
+ )
+ # expects pool is_stretch_pool flag to be true
+ self.assertEqual(
+ True,
+ pool['is_stretch_pool']
+ )
+ # expects peering_crush_bucket_count = 2 (always this value for stretch mode)
+ self.assertEqual(
+ 2,
+ pool['peering_crush_bucket_count']
+ )
+ # expects peering_crush_bucket_target = 2 (always this value for stretch mode)
+ self.assertEqual(
+ 2,
+ pool['peering_crush_bucket_target']
+ )
+ # expects peering_crush_bucket_barrier = 8 (crush type of datacenter is 8)
+ self.assertEqual(
+ 8,
+ pool['peering_crush_bucket_barrier']
+ )
+ # expects stretch_mode_enabled to be True
+ self.assertEqual(
+ True,
+ osdmap['stretch_mode']['stretch_mode_enabled']
+ )
+ # expects stretch_mode_bucket_count to be 2
+ self.assertEqual(
+ 2,
+ osdmap['stretch_mode']['stretch_bucket_count']
+ )
+ # expects degraded_stretch_mode to be 0
+ self.assertEqual(
+ 0,
+ osdmap['stretch_mode']['degraded_stretch_mode']
+ )
+ # expects recovering_stretch_mode to be 0
+ self.assertEqual(
+ 0,
+ osdmap['stretch_mode']['recovering_stretch_mode']
+ )
+ # expects stretch_mode_bucket to be 8 (datacenter crush type = 8)
+ self.assertEqual(
+ 8,
+ osdmap['stretch_mode']['stretch_mode_bucket']
+ )
+ # Checking the MonMap
+ monmap = self.mgr_cluster.mon_manager.get_mon_dump_json()
+ # expects stretch_mode to be True
+ self.assertEqual(
+ True,
+ monmap['stretch_mode']
+ )
+ # expects disallowed_leaders to be tiebreaker_mon
+ self.assertEqual(
+ self.TIEBREAKER_MON_NAME,
+ monmap['disallowed_leaders']
+ )
+ # expects tiebreaker_mon to be tiebreaker_mon
+ self.assertEqual(
+ self.TIEBREAKER_MON_NAME,
+ monmap['tiebreaker_mon']
+ )
+
+ def _stretch_mode_disabled_correctly(self):
+ """
+ Evaluate whether the stretch mode is disabled correctly.
+ by checking the OSDMap and MonMap.
+ """
+ # Checking the OSDMap
+ osdmap = self.mgr_cluster.mon_manager.get_osd_dump_json()
+ for pool in osdmap['pools']:
+ # expects crush_rule to be default
+ self.assertEqual(
+ self.DEFAULT_POOL_CRUSH_RULE_ID,
+ pool['crush_rule']
+ )
+ # expects pool size to be default
+ self.assertEqual(
+ self.DEFAULT_POOL_SIZE,
+ pool['size']
+ )
+ # expects pool min_size to be default
+ self.assertEqual(
+ self.DEFAULT_POOL_MIN_SIZE,
+ pool['min_size']
+ )
+ # expects pool is_stretch_pool flag to be false
+ self.assertEqual(
+ False,
+ pool['is_stretch_pool']
+ )
+ # expects peering_crush_bucket_count = 0
+ self.assertEqual(
+ 0,
+ pool['peering_crush_bucket_count']
+ )
+ # expects peering_crush_bucket_target = 0
+ self.assertEqual(
+ 0,
+ pool['peering_crush_bucket_target']
+ )
+ # expects peering_crush_bucket_barrier = 0
+ self.assertEqual(
+ 0,
+ pool['peering_crush_bucket_barrier']
+ )
+ # expects stretch_mode_enabled to be False
+ self.assertEqual(
+ False,
+ osdmap['stretch_mode']['stretch_mode_enabled']
+ )
+ # expects stretch_mode_bucket to be 0
+ self.assertEqual(
+ 0,
+ osdmap['stretch_mode']['stretch_bucket_count']
+ )
+ # expects degraded_stretch_mode to be 0
+ self.assertEqual(
+ 0,
+ osdmap['stretch_mode']['degraded_stretch_mode']
+ )
+ # expects recovering_stretch_mode to be 0
+ self.assertEqual(
+ 0,
+ osdmap['stretch_mode']['recovering_stretch_mode']
+ )
+ # expects stretch_mode_bucket to be 0
+ self.assertEqual(
+ 0,
+ osdmap['stretch_mode']['stretch_mode_bucket']
+ )
+ # Checking the MonMap
+ monmap = self.mgr_cluster.mon_manager.get_mon_dump_json()
+ # expects stretch_mode to be False
+ self.assertEqual(
+ False,
+ monmap['stretch_mode']
+ )
+ # expects disallowed_leaders to be empty
+ self.assertEqual(
+ "",
+ monmap['disallowed_leaders']
+ )
+ # expects tiebreaker_mon to be empty
+ self.assertEqual(
+ "",
+ monmap['tiebreaker_mon']
+ )
+
+ def test_disable_stretch_mode(self):
+ """
+ Test disabling stretch mode with the following scenario:
+ 1. Healthy Stretch Mode
+ 2. Degraded Stretch Mode
+ """
+ # Create a pool
+ self._setup_pool(self.POOL, 16, 'replicated', self.STRETCH_CRUSH_RULE, 4, 2)
+ # Write some data to the pool
+ self._write_some_data(self.WRITE_PERIOD)
+ # disable stretch mode without --yes-i-really-mean-it (expects -EPERM 1)
+ self.assertEqual(
+ 1,
+ self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ 'mon',
+ 'disable_stretch_mode'
+ ))
+ # Disable stretch mode with non-existent crush rule (expects -EINVAL 22)
+ self.assertEqual(
+ 22,
+ self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ 'mon',
+ 'disable_stretch_mode',
+ 'non_existent_rule',
+ '--yes-i-really-mean-it'
+ ))
+ # Disable stretch mode with the current stretch rule (expect -EINVAL 22)
+ self.assertEqual(
+ 22,
+ self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ 'mon',
+ 'disable_stretch_mode',
+ self.STRETCH_CRUSH_RULE,
+ '--yes-i-really-mean-it',
+
+ ))
+ # Disable stretch mode without crush rule (expect success 0)
+ self.assertEqual(
+ 0,
+ self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ 'mon',
+ 'disable_stretch_mode',
+ '--yes-i-really-mean-it'
+ ))
+ # Check if stretch mode is disabled correctly
+ self._stretch_mode_disabled_correctly()
+ # all PGs are active + clean
+ self.wait_until_true_and_hold(
+ lambda: self.mgr_cluster.mon_manager.pg_all_active_clean(),
+ timeout=self.RECOVERY_PERIOD,
+ success_hold_time=self.SUCCESS_HOLD_TIME
+ )
+ # write some data to the pool
+ self._write_some_data(self.WRITE_PERIOD)
+ # Enable stretch mode
+ self.assertEqual(
+ 0,
+ self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ 'mon',
+ 'enable_stretch_mode',
+ self.TIEBREAKER_MON_NAME,
+ self.STRETCH_CRUSH_RULE,
+ self.STRETCH_BUCKET_TYPE
+ ))
+ self._stretch_mode_enabled_correctly()
+ # all PGs are active + clean
+ self.wait_until_true_and_hold(
+ lambda: self.mgr_cluster.mon_manager.pg_all_active_clean(),
+ timeout=self.RECOVERY_PERIOD,
+ success_hold_time=self.SUCCESS_HOLD_TIME
+ )
+ # write some data to the pool
+ # self._write_some_data(self.WRITE_PERIOD)
+ # Bring down dc1
+ self._fail_over_all_osds_in_dc('dc1')
+ self._fail_over_all_mons_in_dc('dc1')
+ # should be in degraded stretch mode
+ self.wait_until_true_and_hold(
+ lambda: self.mgr_cluster.mon_manager.is_degraded_stretch_mode(),
+ timeout=self.RECOVERY_PERIOD,
+ success_hold_time=self.SUCCESS_HOLD_TIME
+ )
+ # Disable stretch mode with valid crush rule (expect success 0)
+ self.assertEqual(
+ 0,
+ self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ 'mon',
+ 'disable_stretch_mode',
+ self.DEFAULT_POOL_CRUSH_RULE,
+ '--yes-i-really-mean-it'
+ ))
+ # Check if stretch mode is disabled correctly
+ self._stretch_mode_disabled_correctly()
+ # all PGs are active
+ self.wait_until_true_and_hold(
+ lambda: self.mgr_cluster.mon_manager.pg_all_active(),
+ timeout=self.RECOVERY_PERIOD,
+ success_hold_time=self.SUCCESS_HOLD_TIME
+ )
diff --git a/qa/tasks/thrashosds-health.yaml b/qa/tasks/thrashosds-health.yaml
index b70583a75e1..dbde1ced0db 100644
--- a/qa/tasks/thrashosds-health.yaml
+++ b/qa/tasks/thrashosds-health.yaml
@@ -30,3 +30,4 @@ overrides:
- out of quorum
- noscrub
- nodeep-scrub
+ - is down
diff --git a/qa/tasks/vstart_runner.py b/qa/tasks/vstart_runner.py
index ca929ba05b4..2ed21431330 100644
--- a/qa/tasks/vstart_runner.py
+++ b/qa/tasks/vstart_runner.py
@@ -233,6 +233,11 @@ class LocalRemoteProcess(object):
else:
self.stderr.write(err)
+ def _handle_subprocess_output(self, output, stream):
+ if isinstance(stream, StringIO):
+ return rm_nonascii_chars(output)
+ return output
+
def wait(self, timeout=None):
# Null subproc.stdin so communicate() does not try flushing/closing it
# again.
@@ -250,7 +255,8 @@ class LocalRemoteProcess(object):
return
out, err = self.subproc.communicate(timeout=timeout)
- out, err = rm_nonascii_chars(out), rm_nonascii_chars(err)
+ out = self._handle_subprocess_output(out, self.stdout)
+ err = self._handle_subprocess_output(err, self.stderr)
self._write_stdout(out)
self._write_stderr(err)
diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh
index cdfff17d837..ad5950367e9 100755
--- a/qa/workunits/cephtool/test.sh
+++ b/qa/workunits/cephtool/test.sh
@@ -63,7 +63,7 @@ function retry_eagain()
for count in $(seq 1 $max) ; do
status=0
"$@" > $tmpfile 2>&1 || status=$?
- if test $status = 0 ||
+ if test $status = 0 ||
! grep --quiet EAGAIN $tmpfile ; then
break
fi
@@ -108,7 +108,7 @@ function check_response()
exit 1
fi
- if ! grep --quiet -- "$expected_string" $TMPFILE ; then
+ if ! grep --quiet -- "$expected_string" $TMPFILE ; then
echo "Didn't find $expected_string in output" >&2
cat $TMPFILE >&2
exit 1
@@ -696,7 +696,7 @@ function test_auth_profiles()
ceph -n client.xx-profile-rd -k client.xx.keyring auth del client.xx-profile-ro
ceph -n client.xx-profile-rd -k client.xx.keyring auth del client.xx-profile-rw
-
+
# add a new role-definer with the existing role-definer
ceph -n client.xx-profile-rd -k client.xx.keyring \
auth add client.xx-profile-rd2 mon 'allow profile role-definer'
@@ -730,7 +730,7 @@ function test_mon_caps()
ceph-authtool -n client.bug --cap mon '' $TEMP_DIR/ceph.client.bug.keyring
ceph auth add client.bug -i $TEMP_DIR/ceph.client.bug.keyring
rados lspools --no-mon-config --keyring $TEMP_DIR/ceph.client.bug.keyring -n client.bug >& $TMPFILE || true
- check_response "Permission denied"
+ check_response "Permission denied"
}
function test_mon_misc()
@@ -780,7 +780,6 @@ function test_mon_misc()
ceph mgr dump
ceph mgr dump | jq -e '.active_clients[0].name'
ceph mgr module ls
- ceph mgr module enable restful
expect_false ceph mgr module enable foodne
ceph mgr module enable foodne --force
ceph mgr module disable foodne
@@ -1650,7 +1649,7 @@ function test_mon_osd()
dump_json=$(ceph osd dump --format=json | \
jq -cM '.osds[] | select(.osd == 0)')
[[ "${info_json}" == "${dump_json}" ]]
-
+
info_plain="$(ceph osd info)"
dump_plain="$(ceph osd dump | grep '^osd')"
[[ "${info_plain}" == "${dump_plain}" ]]
@@ -2244,7 +2243,7 @@ function test_mon_pg()
# tell osd version
#
ceph tell osd.0 version
- expect_false ceph tell osd.9999 version
+ expect_false ceph tell osd.9999 version
expect_false ceph tell osd.foo version
# back to pg stuff
@@ -2336,7 +2335,7 @@ function test_mon_osd_pool_set()
ceph osd pool get $TEST_POOL_GETSET deep_scrub_interval | expect_false grep '.'
ceph osd pool get $TEST_POOL_GETSET recovery_priority | expect_false grep '.'
- ceph osd pool set $TEST_POOL_GETSET recovery_priority 5
+ ceph osd pool set $TEST_POOL_GETSET recovery_priority 5
ceph osd pool get $TEST_POOL_GETSET recovery_priority | grep 'recovery_priority: 5'
ceph osd pool set $TEST_POOL_GETSET recovery_priority -5
ceph osd pool get $TEST_POOL_GETSET recovery_priority | grep 'recovery_priority: -5'
@@ -2346,13 +2345,13 @@ function test_mon_osd_pool_set()
expect_false ceph osd pool set $TEST_POOL_GETSET recovery_priority 11
ceph osd pool get $TEST_POOL_GETSET recovery_op_priority | expect_false grep '.'
- ceph osd pool set $TEST_POOL_GETSET recovery_op_priority 5
+ ceph osd pool set $TEST_POOL_GETSET recovery_op_priority 5
ceph osd pool get $TEST_POOL_GETSET recovery_op_priority | grep 'recovery_op_priority: 5'
ceph osd pool set $TEST_POOL_GETSET recovery_op_priority 0
ceph osd pool get $TEST_POOL_GETSET recovery_op_priority | expect_false grep '.'
ceph osd pool get $TEST_POOL_GETSET scrub_priority | expect_false grep '.'
- ceph osd pool set $TEST_POOL_GETSET scrub_priority 5
+ ceph osd pool set $TEST_POOL_GETSET scrub_priority 5
ceph osd pool get $TEST_POOL_GETSET scrub_priority | grep 'scrub_priority: 5'
ceph osd pool set $TEST_POOL_GETSET scrub_priority 0
ceph osd pool get $TEST_POOL_GETSET scrub_priority | expect_false grep '.'
@@ -2386,10 +2385,10 @@ function test_mon_osd_pool_set()
ceph osd pool set $TEST_POOL_GETSET size 2
wait_for_clean
ceph osd pool set $TEST_POOL_GETSET min_size 2
-
+
expect_false ceph osd pool set $TEST_POOL_GETSET hashpspool 0
ceph osd pool set $TEST_POOL_GETSET hashpspool 0 --yes-i-really-mean-it
-
+
expect_false ceph osd pool set $TEST_POOL_GETSET hashpspool 1
ceph osd pool set $TEST_POOL_GETSET hashpspool 1 --yes-i-really-mean-it
@@ -2587,7 +2586,7 @@ function test_mon_osd_misc()
ceph osd map 2>$TMPFILE; check_response 'pool' $? 22
# expect error about unused argument foo
- ceph osd ls foo 2>$TMPFILE; check_response 'unused' $? 22
+ ceph osd ls foo 2>$TMPFILE; check_response 'unused' $? 22
# expect "not in range" for invalid overload percentage
ceph osd reweight-by-utilization 80 2>$TMPFILE; check_response 'higher than 100' $? 22
diff --git a/qa/workunits/erasure-code/bench.sh b/qa/workunits/erasure-code/bench.sh
index fc75830dfd0..87e997c3500 100755
--- a/qa/workunits/erasure-code/bench.sh
+++ b/qa/workunits/erasure-code/bench.sh
@@ -17,7 +17,8 @@
#
# Test that it works from sources with:
#
-# CEPH_ERASURE_CODE_BENCHMARK=src/ceph_erasure_code_benchmark \
+# TOTAL_SIZE=$((4 * 1024 * 1024)) SIZE=4096 \
+# CEPH_ERASURE_CODE_BENCHMARK=build/bin/ceph_erasure_code_benchmark \
# PLUGIN_DIRECTORY=build/lib \
# qa/workunits/erasure-code/bench.sh fplot jerasure |
# tee qa/workunits/erasure-code/bench.js
@@ -34,10 +35,14 @@
# firefox qa/workunits/erasure-code/bench.html
#
# Once it is confirmed to work, it can be run with a more significant
-# volume of data so that the measures are more reliable:
+# volume of data so that the measures are more reliable. Ideally the size
+# of the buffers (SIZE) should be larger than the L3 cache to avoid cache hits.
+# The following example uses an 80MB (80 * 1024 * 1024) buffer.
+# A larger buffer with fewer iterations (iterations = TOTAL SIZE / SIZE) should result in
+# more time spent encoding/decoding and less time allocating/aligning buffers:
#
-# TOTAL_SIZE=$((4 * 1024 * 1024 * 1024)) \
-# CEPH_ERASURE_CODE_BENCHMARK=src/ceph_erasure_code_benchmark \
+# TOTAL_SIZE=$((100 * 80 * 1024 * 1024)) SIZE=$((80 * 1024 * 1024)) \
+# CEPH_ERASURE_CODE_BENCHMARK=build/bin/ceph_erasure_code_benchmark \
# PLUGIN_DIRECTORY=build/lib \
# qa/workunits/erasure-code/bench.sh fplot jerasure |
# tee qa/workunits/erasure-code/bench.js
@@ -51,8 +56,8 @@ export PATH=/sbin:$PATH
: ${PLUGIN_DIRECTORY:=/usr/lib/ceph/erasure-code}
: ${PLUGINS:=isa jerasure}
: ${TECHNIQUES:=vandermonde cauchy liberation reed_sol_r6_op blaum_roth liber8tion}
-: ${TOTAL_SIZE:=$((1024 * 1024))}
-: ${SIZE:=4096}
+: ${TOTAL_SIZE:=$((100 * 80 * 1024 * 1024))} #TOTAL_SIZE / SIZE = number of encode or decode iterations to run
+: ${SIZE:=$((80 * 1024 * 1024))} #size of buffer to encode/decode
: ${PARAMETERS:=--parameter jerasure-per-chunk-alignment=true}
declare -rA isa_techniques=(
diff --git a/qa/workunits/fs/misc/fallocate.sh b/qa/workunits/fs/misc/fallocate.sh
new file mode 100755
index 00000000000..253e6cb7a37
--- /dev/null
+++ b/qa/workunits/fs/misc/fallocate.sh
@@ -0,0 +1,17 @@
+#!/bin/sh -x
+
+# fallocate with mode 0 should fail with EOPNOTSUPP
+set -e
+mkdir -p testdir
+cd testdir
+
+expect_failure() {
+ if "$@"; then return 1; else return 0; fi
+}
+
+expect_failure fallocate -l 1M preallocated.txt
+rm -f preallocated.txt
+
+cd ..
+rmdir testdir
+echo OK
diff --git a/qa/workunits/fs/snaps/snaptest-git-ceph.sh b/qa/workunits/fs/snaps/snaptest-git-ceph.sh
index 2b38720c9a5..6079ba8945b 100755
--- a/qa/workunits/fs/snaps/snaptest-git-ceph.sh
+++ b/qa/workunits/fs/snaps/snaptest-git-ceph.sh
@@ -4,7 +4,14 @@ set -e
# increase the cache size
sudo git config --global http.sslVerify false
-sudo git config --global http.postBuffer 1048576000
+sudo git config --global http.postBuffer 1024MB # default is 1MB
+sudo git config --global http.maxRequestBuffer 100M # default is 10MB
+sudo git config --global core.compression 0
+
+# enable the debug logs for git clone
+export GIT_TRACE_PACKET=1
+export GIT_TRACE=1
+export GIT_CURL_VERBOSE=1
# try it again if the clone is slow and the second time
retried=false
@@ -19,6 +26,11 @@ timeout 1800 git clone https://git.ceph.com/ceph.git
trap - EXIT
cd ceph
+# disable the debug logs for git clone
+export GIT_TRACE_PACKET=0
+export GIT_TRACE=0
+export GIT_CURL_VERBOSE=0
+
versions=`seq 1 90`
for v in $versions
diff --git a/qa/workunits/mon/mon-stretch-mode-5-mons-8-osds.sh b/qa/workunits/mon/mon-stretch-mode-5-mons-8-osds.sh
new file mode 100755
index 00000000000..827fb0a0b13
--- /dev/null
+++ b/qa/workunits/mon/mon-stretch-mode-5-mons-8-osds.sh
@@ -0,0 +1,72 @@
+#!/bin/bash -ex
+
+# A bash script for setting up stretch mode with 5 monitors and 8 OSDs.
+
+NUM_OSDS_UP=$(ceph osd df | grep "up" | wc -l)
+
+if [ $NUM_OSDS_UP -lt 8 ]; then
+ echo "test requires at least 8 OSDs up and running"
+ exit 1
+fi
+
+# ensure election strategy is set to "connectivity"
+# See https://tracker.ceph.com/issues/69107
+ceph mon set election_strategy connectivity
+
+for dc in dc1 dc2
+ do
+ ceph osd crush add-bucket $dc datacenter
+ ceph osd crush move $dc root=default
+ done
+
+ceph osd crush add-bucket host01 host
+ceph osd crush add-bucket host02 host
+ceph osd crush add-bucket host03 host
+ceph osd crush add-bucket host04 host
+
+ceph osd crush move host01 datacenter=dc1
+ceph osd crush move host02 datacenter=dc1
+ceph osd crush move host03 datacenter=dc2
+ceph osd crush move host04 datacenter=dc2
+
+ceph osd crush move osd.0 host=host01
+ceph osd crush move osd.1 host=host01
+ceph osd crush move osd.2 host=host02
+ceph osd crush move osd.3 host=host02
+ceph osd crush move osd.4 host=host03
+ceph osd crush move osd.5 host=host03
+ceph osd crush move osd.6 host=host04
+ceph osd crush move osd.7 host=host04
+
+# set location for monitors
+ceph mon set_location a datacenter=dc1 host=host01
+ceph mon set_location b datacenter=dc1 host=host02
+ceph mon set_location c datacenter=dc2 host=host03
+ceph mon set_location d datacenter=dc2 host=host04
+
+# set location for tiebreaker monitor
+ceph mon set_location e datacenter=dc3 host=host05
+
+# remove the current host from crush map
+hostname=$(hostname -s)
+ceph osd crush remove $hostname
+# create a new crush rule with stretch rule
+ceph osd getcrushmap > crushmap
+crushtool --decompile crushmap > crushmap.txt
+sed 's/^# end crush map$//' crushmap.txt > crushmap_modified.txt
+cat >> crushmap_modified.txt << EOF
+rule stretch_rule {
+ id 2
+ type replicated
+ step take default
+ step choose firstn 2 type datacenter
+ step chooseleaf firstn 2 type host
+ step emit
+}
+# end crush map
+EOF
+
+crushtool --compile crushmap_modified.txt -o crushmap.bin
+ceph osd setcrushmap -i crushmap.bin
+
+ceph mon enable_stretch_mode e stretch_rule datacenter
diff --git a/qa/workunits/nvmeof/basic_tests.sh b/qa/workunits/nvmeof/basic_tests.sh
index dc6fd1669da..9e7a1f5134e 100755
--- a/qa/workunits/nvmeof/basic_tests.sh
+++ b/qa/workunits/nvmeof/basic_tests.sh
@@ -38,8 +38,10 @@ disconnect_all() {
connect_all() {
sudo nvme connect-all --traddr=$NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --transport=tcp -l 3600
sleep 5
- output=$(sudo nvme list --output-format=json)
- if ! echo "$output" | grep -q "$SPDK_CONTROLLER"; then
+ expected_devices_count=$1
+ actual_devices=$(sudo nvme list --output-format=json | jq -r ".Devices[].Subsystems[] | select(.Controllers | all(.ModelNumber == \"$SPDK_CONTROLLER\")) | .Namespaces[].NameSpace" | wc -l)
+ if [ "$actual_devices" -ne "$expected_devices_count" ]; then
+ sudo nvme list --output-format=json
return 1
fi
}
@@ -72,11 +74,13 @@ test_run connect
test_run list_subsys 1
test_run disconnect_all
test_run list_subsys 0
-test_run connect_all
+devices_count=$(( $NVMEOF_NAMESPACES_COUNT * $NVMEOF_SUBSYSTEMS_COUNT ))
+test_run connect_all $devices_count
gateways_count=$(( $(echo "$NVMEOF_GATEWAY_IP_ADDRESSES" | tr -cd ',' | wc -c) + 1 ))
multipath_count=$(( $gateways_count * $NVMEOF_SUBSYSTEMS_COUNT))
test_run list_subsys $multipath_count
+
echo "-------------Test Summary-------------"
echo "[nvmeof] All nvmeof basic tests passed!"
diff --git a/qa/workunits/nvmeof/fio_test.sh b/qa/workunits/nvmeof/fio_test.sh
index 57d355a6318..f7f783afc67 100755
--- a/qa/workunits/nvmeof/fio_test.sh
+++ b/qa/workunits/nvmeof/fio_test.sh
@@ -5,6 +5,7 @@ sudo yum -y install sysstat
namespace_range_start=
namespace_range_end=
+random_devices_count=
rbd_iostat=false
while [[ $# -gt 0 ]]; do
@@ -17,6 +18,10 @@ while [[ $# -gt 0 ]]; do
namespace_range_end=$2
shift 2
;;
+ --random_devices)
+ random_devices_count=$2
+ shift 2
+ ;;
--rbd_iostat)
rbd_iostat=true
shift
@@ -29,7 +34,7 @@ done
fio_file=$(mktemp -t nvmeof-fio-XXXX)
all_drives_list=$(sudo nvme list --output-format=json |
- jq -r '.Devices | sort_by(.NameSpace) | .[] | select(.ModelNumber == "Ceph bdev Controller") | .DevicePath')
+ jq -r '.Devices[].Subsystems[] | select(.Controllers | all(.ModelNumber == "Ceph bdev Controller")) | .Namespaces | sort_by(.NSID) | .[] | .NameSpace')
# When the script is passed --start_ns and --end_ns (example: `nvmeof_fio_test.sh --start_ns 1 --end_ns 3`),
# then fio runs on namespaces only in the defined range (which is 1 to 3 here).
@@ -37,6 +42,8 @@ all_drives_list=$(sudo nvme list --output-format=json |
# run on first 3 namespaces here.
if [ "$namespace_range_start" ] || [ "$namespace_range_end" ]; then
selected_drives=$(echo "${all_drives_list[@]}" | sed -n "${namespace_range_start},${namespace_range_end}p")
+elif [ "$random_devices_count" ]; then
+ selected_drives=$(echo "${all_drives_list[@]}" | shuf -n $random_devices_count)
else
selected_drives="${all_drives_list[@]}"
fi
diff --git a/qa/workunits/nvmeof/mtls_test.sh b/qa/workunits/nvmeof/mtls_test.sh
new file mode 100755
index 00000000000..e13ca530e8d
--- /dev/null
+++ b/qa/workunits/nvmeof/mtls_test.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+set -ex
+source /etc/ceph/nvmeof.env
+
+# install yq
+wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /tmp/yq && chmod +x /tmp/yq
+
+subjectAltName=$(echo "$NVMEOF_GATEWAY_IP_ADDRESSES" | sed 's/,/,IP:/g')
+
+# create mtls spec files
+ceph orch ls nvmeof --export > /tmp/gw-conf-original.yaml
+sudo /tmp/yq ".spec.enable_auth=true | \
+ .spec.root_ca_cert=\"mountcert\" | \
+ .spec.client_cert = load_str(\"/etc/ceph/client.crt\") | \
+ .spec.client_key = load_str(\"/etc/ceph/client.key\") | \
+ .spec.server_cert = load_str(\"/etc/ceph/server.crt\") | \
+ .spec.server_key = load_str(\"/etc/ceph/server.key\")" /tmp/gw-conf-original.yaml > /tmp/gw-conf-with-mtls.yaml
+cp /tmp/gw-conf-original.yaml /tmp/gw-conf-without-mtls.yaml
+sudo /tmp/yq '.spec.enable_auth=false' -i /tmp/gw-conf-without-mtls.yaml
+
+wait_for_service() {
+ MAX_RETRIES=30
+ for ((RETRY_COUNT=1; RETRY_COUNT<=MAX_RETRIES; RETRY_COUNT++)); do
+
+ if ceph orch ls --refresh | grep -q "nvmeof"; then
+ echo "Found nvmeof in the output!"
+ break
+ fi
+ if [ $RETRY_COUNT -eq $MAX_RETRIES ]; then
+ echo "Reached maximum retries ($MAX_RETRIES). Exiting."
+ break
+ fi
+ sleep 5
+ done
+ ceph orch ps
+ ceph orch ls --refresh
+}
+
+# deploy mtls
+cat /tmp/gw-conf-with-mtls.yaml
+ceph orch apply -i /tmp/gw-conf-with-mtls.yaml
+ceph orch redeploy nvmeof.mypool.mygroup0
+sleep 100
+wait_for_service
+
+
+# test
+IFS=',' read -ra gateway_ips <<< "$NVMEOF_GATEWAY_IP_ADDRESSES"
+for i in "${!gateway_ips[@]}"
+do
+ ip="${gateway_ips[i]}"
+ sudo podman run -v /etc/ceph/server.crt:/server.crt:z -v /etc/ceph/client.crt:/client.crt:z \
+ -v /etc/ceph/client.key:/client.key:z \
+ -it $NVMEOF_CLI_IMAGE --server-address $ip --server-port $NVMEOF_SRPORT \
+ --client-key /client.key --client-cert /client.crt --server-cert /server.crt --format json subsystem list
+done
+
+
+# remove mtls
+cat /tmp/gw-conf-without-mtls.yaml
+ceph orch apply -i /tmp/gw-conf-without-mtls.yaml
+ceph orch redeploy nvmeof.mypool.mygroup0
+sleep 100
+wait_for_service
+
+
+# test
+IFS=',' read -ra gateway_ips <<< "$NVMEOF_GATEWAY_IP_ADDRESSES"
+for i in "${!gateway_ips[@]}"
+do
+ ip="${gateway_ips[i]}"
+ sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $ip --server-port $NVMEOF_SRPORT \
+ --format json subsystem list
+done
+
diff --git a/qa/workunits/nvmeof/scalability_test.sh b/qa/workunits/nvmeof/scalability_test.sh
index 5a26b6284f7..8ede4b7eda2 100755
--- a/qa/workunits/nvmeof/scalability_test.sh
+++ b/qa/workunits/nvmeof/scalability_test.sh
@@ -3,37 +3,64 @@
GATEWAYS=$1 # exmaple "nvmeof.a,nvmeof.b"
DELAY="${SCALING_DELAYS:-50}"
+POOL="${RBD_POOL:-mypool}"
+GROUP="${NVMEOF_GROUP:-mygroup0}"
+source /etc/ceph/nvmeof.env
if [ -z "$GATEWAYS" ]; then
echo "At least one gateway needs to be defined for scalability test"
exit 1
fi
-pip3 install yq
-
status_checks() {
- ceph nvme-gw show mypool ''
- ceph orch ls
- ceph orch ps
- ceph -s
+ expected_count=$1
+
+ output=$(ceph nvme-gw show $POOL $GROUP)
+ nvme_show=$(echo $output | grep -o '"AVAILABLE"' | wc -l)
+ if [ "$nvme_show" -ne "$expected_count" ]; then
+ return 1
+ fi
+
+ orch_ls=$(ceph orch ls)
+ if ! echo "$orch_ls" | grep -q "$expected_count/$expected_count"; then
+ return 1
+ fi
+
+ output=$(ceph orch ps --service-name nvmeof.$POOL.$GROUP)
+ orch_ps=$(echo $output | grep -o 'running' | wc -l)
+ if [ "$orch_ps" -ne "$expected_count" ]; then
+ return 1
+ fi
+
+ ceph_status=$(ceph -s)
+ if ! echo "$ceph_status" | grep -q "HEALTH_OK"; then
+ return 1
+ fi
}
+total_gateways_count=$(( $(echo "$NVMEOF_GATEWAY_IP_ADDRESSES" | tr -cd ',' | wc -c) + 1 ))
+scaled_down_gateways_count=$(( total_gateways_count - $(echo "$GATEWAYS" | tr -cd ',' | wc -c) - 1 ))
+
echo "[nvmeof.scale] Setting up config to remove gateways ${GATEWAYS}"
+ceph orch ls --service-name nvmeof.$POOL.$GROUP --export > /tmp/nvmeof-gw.yaml
ceph orch ls nvmeof --export > /tmp/nvmeof-gw.yaml
cat /tmp/nvmeof-gw.yaml
-yq "del(.placement.hosts[] | select(. | test(\".*($(echo $GATEWAYS | sed 's/,/|/g'))\")))" /tmp/nvmeof-gw.yaml > /tmp/nvmeof-gw-new.yaml
+
+pattern=$(echo $GATEWAYS | sed 's/,/\\|/g')
+sed "/$pattern/d" /tmp/nvmeof-gw.yaml > /tmp/nvmeof-gw-new.yaml
cat /tmp/nvmeof-gw-new.yaml
echo "[nvmeof.scale] Starting scale testing by removing ${GATEWAYS}"
-status_checks
-ceph orch rm nvmeof.mypool && sleep 20 # temp workaround
+status_checks $total_gateways_count
ceph orch apply -i /tmp/nvmeof-gw-new.yaml # downscale
+ceph orch redeploy nvmeof.$POOL.$GROUP
sleep $DELAY
-status_checks
-ceph orch rm nvmeof.mypool && sleep 20 # temp workaround
+status_checks $scaled_down_gateways_count
+echo "[nvmeof.scale] Downscale complete - removed gateways (${GATEWAYS}); now scaling back up"
ceph orch apply -i /tmp/nvmeof-gw.yaml #upscale
+ceph orch redeploy nvmeof.$POOL.$GROUP
sleep $DELAY
-status_checks
+status_checks $total_gateways_count
echo "[nvmeof.scale] Scale testing passed for ${GATEWAYS}"
diff --git a/qa/workunits/nvmeof/setup_subsystem.sh b/qa/workunits/nvmeof/setup_subsystem.sh
index cc4024323eb..b573647b1e3 100755
--- a/qa/workunits/nvmeof/setup_subsystem.sh
+++ b/qa/workunits/nvmeof/setup_subsystem.sh
@@ -26,14 +26,21 @@ list_subsystems () {
done
}
+list_namespaces () {
+ for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do
+ subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}"
+ sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format plain namespace list --subsystem $subsystem_nqn
+ done
+}
+
+echo "[nvmeof] Starting subsystem setup..."
+
# add all subsystems
for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do
subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}"
sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT subsystem add --subsystem $subsystem_nqn --no-group-append
done
-list_subsystems
-
# add all gateway listeners
for i in "${!gateway_ips[@]}"
do
@@ -65,11 +72,5 @@ done
list_subsystems
-# list namespaces
-for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do
- subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}"
- sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format plain namespace list --subsystem $subsystem_nqn
-done
-
echo "[nvmeof] Subsystem setup done"
diff --git a/qa/workunits/rbd/cli_generic.sh b/qa/workunits/rbd/cli_generic.sh
index 2aa27d3d655..0ceb9ff54cf 100755
--- a/qa/workunits/rbd/cli_generic.sh
+++ b/qa/workunits/rbd/cli_generic.sh
@@ -914,6 +914,11 @@ test_namespace() {
rbd group create rbd/test1/group1
rbd group image add rbd/test1/group1 rbd/test1/image1
+ rbd group image add --group-pool rbd --group-namespace test1 --group group1 \
+ --image-pool rbd --image-namespace test1 --image image2
+ rbd group image rm --group-pool rbd --group-namespace test1 --group group1 \
+ --image-pool rbd --image-namespace test1 --image image1
+ rbd group image rm rbd/test1/group1 rbd/test1/image2
rbd group rm rbd/test1/group1
rbd trash move rbd/test1/image1
diff --git a/qa/workunits/rbd/cli_migration.sh b/qa/workunits/rbd/cli_migration.sh
index b044e747cbb..3af19420957 100755
--- a/qa/workunits/rbd/cli_migration.sh
+++ b/qa/workunits/rbd/cli_migration.sh
@@ -5,12 +5,16 @@ TEMPDIR=
IMAGE1=image1
IMAGE2=image2
IMAGE3=image3
-IMAGES="${IMAGE1} ${IMAGE2} ${IMAGE3}"
+NAMESPACE1=namespace1
+NAMESPACE2=namespace2
+NAMESPACES="${NAMESPACE1} ${NAMESPACE2}"
+IMAGES="${IMAGE1} ${IMAGE2} ${IMAGE3} rbd/${NAMESPACE1}/${IMAGE1} rbd/${NAMESPACE2}/${IMAGE2}"
cleanup() {
kill_nbd_server
cleanup_tempdir
remove_images
+ remove_namespaces
}
setup_tempdir() {
@@ -42,8 +46,11 @@ create_base_image() {
export_raw_image() {
local image=$1
- rm -rf "${TEMPDIR}/${image}"
- rbd export ${image} "${TEMPDIR}/${image}"
+ # Replace slashes (/) with underscores (_) for namespace images
+ local export_image="${image//\//_}"
+
+ rm -rf "${TEMPDIR}/${export_image}"
+ rbd export "${image}" "${TEMPDIR}/${export_image}"
}
export_base_image() {
@@ -69,6 +76,13 @@ remove_images() {
done
}
+remove_namespaces() {
+ for namespace in ${NAMESPACES}
+ do
+ rbd namespace remove rbd/${namespace} || true
+ done
+}
+
kill_nbd_server() {
pkill -9 qemu-nbd || true
}
@@ -90,6 +104,11 @@ compare_images() {
local ret=0
export_raw_image ${dst_image}
+
+ # Replace slashes (/) with underscores (_) for namespace images
+ src_image="${src_image//\//_}"
+ dst_image="${dst_image//\//_}"
+
if ! cmp "${TEMPDIR}/${src_image}" "${TEMPDIR}/${dst_image}"
then
show_diff "${TEMPDIR}/${src_image}" "${TEMPDIR}/${dst_image}"
@@ -99,18 +118,26 @@ compare_images() {
}
test_import_native_format() {
- local base_image=$1
- local dest_image=$2
+ local base_image_spec=$1
+ local dest_image_spec=$2
+
+ # if base image is from namespace
+ local base_namespace=""
+ local base_image=${base_image_spec}
+ if [[ "${base_image_spec}" == rbd/*/* ]]; then
+ base_namespace=$(basename "$(dirname "${base_image_spec}")")
+ base_image=$(basename "${base_image_spec}")
+ fi
- rbd migration prepare --import-only "rbd/${base_image}@2" ${dest_image}
- rbd migration abort ${dest_image}
+ rbd migration prepare --import-only "${base_image_spec}@2" ${dest_image_spec}
+ rbd migration abort ${dest_image_spec}
local pool_id=$(ceph osd pool ls detail --format xml | xmlstarlet sel -t -v "//pools/pool[pool_name='rbd']/pool_id")
cat > ${TEMPDIR}/spec.json <<EOF
{
"type": "native",
"pool_id": ${pool_id},
- "pool_namespace": "",
+ "pool_namespace": "${base_namespace}",
"image_name": "${base_image}",
"snap_name": "2"
}
@@ -118,85 +145,85 @@ EOF
cat ${TEMPDIR}/spec.json
rbd migration prepare --import-only \
- --source-spec-path ${TEMPDIR}/spec.json ${dest_image}
+ --source-spec-path ${TEMPDIR}/spec.json ${dest_image_spec}
- compare_images "${base_image}@1" "${dest_image}@1"
- compare_images "${base_image}@2" "${dest_image}@2"
+ compare_images "${base_image_spec}@1" "${dest_image_spec}@1"
+ compare_images "${base_image_spec}@2" "${dest_image_spec}@2"
- rbd migration abort ${dest_image}
+ rbd migration abort ${dest_image_spec}
rbd migration prepare --import-only \
- --source-spec-path ${TEMPDIR}/spec.json ${dest_image}
- rbd migration execute ${dest_image}
+ --source-spec-path ${TEMPDIR}/spec.json ${dest_image_spec}
+ rbd migration execute ${dest_image_spec}
- compare_images "${base_image}@1" "${dest_image}@1"
- compare_images "${base_image}@2" "${dest_image}@2"
+ compare_images "${base_image_spec}@1" "${dest_image_spec}@1"
+ compare_images "${base_image_spec}@2" "${dest_image_spec}@2"
- rbd migration abort ${dest_image}
+ rbd migration abort ${dest_image_spec}
# no snap name or snap id
expect_false rbd migration prepare --import-only \
- --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\"}" \
- ${dest_image}
+ --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\"}" \
+ ${dest_image_spec}
# invalid source spec JSON
expect_false rbd migration prepare --import-only \
- --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\", \"snap_name\": non-existing}" \
- ${dest_image}
+ --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_name\": non-existing}" \
+ ${dest_image_spec}
# non-existing snap name
expect_false rbd migration prepare --import-only \
- --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\", \"snap_name\": \"non-existing\"}" \
- ${dest_image}
+ --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_name\": \"non-existing\"}" \
+ ${dest_image_spec}
# invalid snap name
expect_false rbd migration prepare --import-only \
- --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\", \"snap_name\": 123456}" \
- ${dest_image}
+ --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_name\": 123456}" \
+ ${dest_image_spec}
# non-existing snap id passed as int
expect_false rbd migration prepare --import-only \
- --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\", \"snap_id\": 123456}" \
- ${dest_image}
+ --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_id\": 123456}" \
+ ${dest_image_spec}
# non-existing snap id passed as string
expect_false rbd migration prepare --import-only \
- --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\", \"snap_id\": \"123456\"}" \
- ${dest_image}
+ --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_id\": \"123456\"}" \
+ ${dest_image_spec}
# invalid snap id
expect_false rbd migration prepare --import-only \
- --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\", \"snap_id\": \"foobar\"}" \
- ${dest_image}
+ --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_id\": \"foobar\"}" \
+ ${dest_image_spec}
# snap id passed as int
- local snap_id=$(rbd snap ls ${base_image} --format xml | xmlstarlet sel -t -v "//snapshots/snapshot[name='2']/id")
+ local snap_id=$(rbd snap ls ${base_image_spec} --format xml | xmlstarlet sel -t -v "//snapshots/snapshot[name='2']/id")
rbd migration prepare --import-only \
- --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\", \"snap_id\": ${snap_id}}" \
- ${dest_image}
- rbd migration abort ${dest_image}
+ --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_id\": ${snap_id}}" \
+ ${dest_image_spec}
+ rbd migration abort ${dest_image_spec}
# snap id passed as string
rbd migration prepare --import-only \
- --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\", \"snap_id\": \"${snap_id}\"}" \
- ${dest_image}
- rbd migration abort ${dest_image}
+ --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_id\": \"${snap_id}\"}" \
+ ${dest_image_spec}
+ rbd migration abort ${dest_image_spec}
rbd migration prepare --import-only \
- --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"image_name\": \"${base_image}\", \"snap_name\": \"2\"}" \
- ${dest_image}
- rbd migration abort ${dest_image}
+ --source-spec "{\"type\": \"native\", \"pool_id\": ${pool_id}, \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_name\": \"2\"}" \
+ ${dest_image_spec}
+ rbd migration abort ${dest_image_spec}
rbd migration prepare --import-only \
- --source-spec "{\"type\": \"native\", \"pool_name\": \"rbd\", \"image_name\": \"${base_image}\", \"snap_name\": \"2\"}" \
- ${dest_image}
- rbd migration execute ${dest_image}
- rbd migration commit ${dest_image}
+ --source-spec "{\"type\": \"native\", \"pool_name\": \"rbd\", \"pool_namespace\": \"${base_namespace}\", \"image_name\": \"${base_image}\", \"snap_name\": \"2\"}" \
+ ${dest_image_spec}
+ rbd migration execute ${dest_image_spec}
+ rbd migration commit ${dest_image_spec}
- compare_images "${base_image}@1" "${dest_image}@1"
- compare_images "${base_image}@2" "${dest_image}@2"
+ compare_images "${base_image_spec}@1" "${dest_image_spec}@1"
+ compare_images "${base_image_spec}@2" "${dest_image_spec}@2"
- remove_image "${dest_image}"
+ remove_image "${dest_image_spec}"
}
test_import_qcow_format() {
@@ -337,12 +364,12 @@ EOF
cat ${TEMPDIR}/spec.json
cat ${TEMPDIR}/spec.json | rbd migration prepare --import-only \
- --source-spec-path - ${dest_image}
+ --source-spec-path - ${dest_image}
compare_images ${base_image} ${dest_image}
rbd migration abort ${dest_image}
rbd migration prepare --import-only \
- --source-spec-path ${TEMPDIR}/spec.json ${dest_image}
+ --source-spec-path ${TEMPDIR}/spec.json ${dest_image}
rbd migration execute ${dest_image}
rbd migration commit ${dest_image}
@@ -587,4 +614,18 @@ test_import_nbd_stream_qcow2 ${IMAGE2} ${IMAGE3}
test_import_raw_format ${IMAGE1} ${IMAGE2}
test_import_nbd_stream_raw ${IMAGE1} ${IMAGE2}
+rbd namespace create rbd/${NAMESPACE1}
+rbd namespace create rbd/${NAMESPACE2}
+create_base_image rbd/${NAMESPACE1}/${IMAGE1}
+export_base_image rbd/${NAMESPACE1}/${IMAGE1}
+
+# Migration from namespace to namespace
+test_import_native_format rbd/${NAMESPACE1}/${IMAGE1} rbd/${NAMESPACE2}/${IMAGE2}
+
+# Migration from namespace to non-namespace
+test_import_native_format rbd/${NAMESPACE1}/${IMAGE1} ${IMAGE2}
+
+# Migration from non-namespace to namespace
+test_import_native_format ${IMAGE1} rbd/${NAMESPACE2}/${IMAGE2}
+
echo OK
diff --git a/qa/workunits/rest/test-restful.sh b/qa/workunits/rest/test-restful.sh
deleted file mode 100755
index fde0d107a0b..00000000000
--- a/qa/workunits/rest/test-restful.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh -ex
-
-mydir=`dirname $0`
-
-secret=`ceph config-key get mgr/restful/keys/admin`
-url=$(ceph mgr dump|jq -r .services.restful|sed -e 's/\/$//')
-echo "url $url secret $secret"
-$mydir/test_mgr_rest_api.py $url $secret
-
-echo $0 OK