6 files changed, 140 insertions, 24 deletions
diff --git a/qa/workunits/mon/mon-stretch-mode-5-mons-8-osds.sh b/qa/workunits/mon/mon-stretch-mode-5-mons-8-osds.sh
new file mode 100755
index 00000000000..827fb0a0b13
--- /dev/null
+++ b/qa/workunits/mon/mon-stretch-mode-5-mons-8-osds.sh
@@ -0,0 +1,72 @@
+#!/bin/bash -ex
+
+# A bash script for setting up stretch mode with 5 monitors and 8 OSDs.
+
+NUM_OSDS_UP=$(ceph osd df | grep "up" | wc -l)
+
+if [ $NUM_OSDS_UP -lt 8 ]; then
+    echo "test requires at least 8 OSDs up and running"
+    exit 1
+fi
+
+# ensure election strategy is set to "connectivity"
+# See https://tracker.ceph.com/issues/69107
+ceph mon set election_strategy connectivity
+
+for dc in dc1 dc2
+    do
+      ceph osd crush add-bucket $dc datacenter
+      ceph osd crush move $dc root=default
+    done
+
+ceph osd crush add-bucket host01 host
+ceph osd crush add-bucket host02 host
+ceph osd crush add-bucket host03 host
+ceph osd crush add-bucket host04 host
+
+ceph osd crush move host01 datacenter=dc1
+ceph osd crush move host02 datacenter=dc1
+ceph osd crush move host03 datacenter=dc2
+ceph osd crush move host04 datacenter=dc2
+
+ceph osd crush move osd.0 host=host01
+ceph osd crush move osd.1 host=host01
+ceph osd crush move osd.2 host=host02
+ceph osd crush move osd.3 host=host02
+ceph osd crush move osd.4 host=host03
+ceph osd crush move osd.5 host=host03
+ceph osd crush move osd.6 host=host04
+ceph osd crush move osd.7 host=host04
+
+# set location for monitors
+ceph mon set_location a datacenter=dc1 host=host01
+ceph mon set_location b datacenter=dc1 host=host02
+ceph mon set_location c datacenter=dc2 host=host03
+ceph mon set_location d datacenter=dc2 host=host04
+
+# set location for tiebreaker monitor
+ceph mon set_location e datacenter=dc3 host=host05
+
+# remove the current host from crush map
+hostname=$(hostname -s)
+ceph osd crush remove $hostname
+# create a new crush rule with stretch rule
+ceph osd getcrushmap > crushmap
+crushtool --decompile crushmap > crushmap.txt
+sed 's/^# end crush map$//' crushmap.txt > crushmap_modified.txt
+cat >> crushmap_modified.txt << EOF
+rule stretch_rule {
+       id 2
+       type replicated
+       step take default
+       step choose firstn 2 type datacenter
+       step chooseleaf firstn 2 type host
+       step emit
+}
+# end crush map
+EOF
+
+crushtool --compile crushmap_modified.txt -o crushmap.bin
+ceph osd setcrushmap -i crushmap.bin
+
+ceph mon enable_stretch_mode e stretch_rule datacenter
diff --git a/qa/workunits/nvmeof/basic_tests.sh b/qa/workunits/nvmeof/basic_tests.sh
index dc6fd1669da..9e7a1f5134e 100755
--- a/qa/workunits/nvmeof/basic_tests.sh
+++ b/qa/workunits/nvmeof/basic_tests.sh
@@ -38,8 +38,10 @@ disconnect_all() {
 connect_all() {
     sudo nvme connect-all --traddr=$NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --transport=tcp -l 3600
     sleep 5
-    output=$(sudo nvme list --output-format=json)
-    if ! echo "$output" | grep -q "$SPDK_CONTROLLER"; then
+    expected_devices_count=$1
+    actual_devices=$(sudo nvme list --output-format=json | jq -r ".Devices[].Subsystems[] | select(.Controllers | all(.ModelNumber == \"$SPDK_CONTROLLER\")) | .Namespaces[].NameSpace" | wc -l)
+    if [ "$actual_devices" -ne "$expected_devices_count" ]; then
+        sudo nvme list --output-format=json
         return 1
     fi
 }
@@ -72,11 +74,13 @@ test_run connect
 test_run list_subsys 1
 test_run disconnect_all
 test_run list_subsys 0
-test_run connect_all
+devices_count=$(( $NVMEOF_NAMESPACES_COUNT * $NVMEOF_SUBSYSTEMS_COUNT )) 
+test_run connect_all $devices_count
 gateways_count=$(( $(echo "$NVMEOF_GATEWAY_IP_ADDRESSES" | tr -cd ',' | wc -c) + 1 ))
 multipath_count=$(( $gateways_count * $NVMEOF_SUBSYSTEMS_COUNT)) 
 test_run list_subsys $multipath_count
 
 
+
 echo "-------------Test Summary-------------"
 echo "[nvmeof] All nvmeof basic tests passed!"
diff --git a/qa/workunits/nvmeof/fio_test.sh b/qa/workunits/nvmeof/fio_test.sh
index 57d355a6318..f7f783afc67 100755
--- a/qa/workunits/nvmeof/fio_test.sh
+++ b/qa/workunits/nvmeof/fio_test.sh
@@ -5,6 +5,7 @@ sudo yum -y install sysstat
 
 namespace_range_start=
 namespace_range_end=
+random_devices_count=
 rbd_iostat=false
 
 while [[ $# -gt 0 ]]; do
@@ -17,6 +18,10 @@ while [[ $# -gt 0 ]]; do
             namespace_range_end=$2
             shift 2
             ;;
+        --random_devices)
+            random_devices_count=$2
+            shift 2
+            ;;
         --rbd_iostat)
             rbd_iostat=true
             shift
@@ -29,7 +34,7 @@ done
 
 fio_file=$(mktemp -t nvmeof-fio-XXXX)
 all_drives_list=$(sudo nvme list --output-format=json | 
-    jq -r '.Devices | sort_by(.NameSpace) | .[] | select(.ModelNumber == "Ceph bdev Controller") | .DevicePath')
+    jq -r '.Devices[].Subsystems[] | select(.Controllers | all(.ModelNumber == "Ceph bdev Controller")) | .Namespaces | sort_by(.NSID) | .[] | .NameSpace')
 
 # When the script is passed --start_ns and --end_ns (example: `nvmeof_fio_test.sh --start_ns 1 --end_ns 3`), 
 # then fio runs on namespaces only in the defined range (which is 1 to 3 here). 
@@ -37,6 +42,8 @@ all_drives_list=$(sudo nvme list --output-format=json |
 # run on first 3 namespaces here.
 if [ "$namespace_range_start" ] || [ "$namespace_range_end" ]; then
     selected_drives=$(echo "${all_drives_list[@]}" | sed -n "${namespace_range_start},${namespace_range_end}p")
+elif [ "$random_devices_count" ]; then 
+    selected_drives=$(echo "${all_drives_list[@]}" | shuf -n $random_devices_count)
 else
     selected_drives="${all_drives_list[@]}"
 fi
diff --git a/qa/workunits/nvmeof/scalability_test.sh b/qa/workunits/nvmeof/scalability_test.sh
index 5a26b6284f7..8ede4b7eda2 100755
--- a/qa/workunits/nvmeof/scalability_test.sh
+++ b/qa/workunits/nvmeof/scalability_test.sh
@@ -3,37 +3,64 @@
 
 GATEWAYS=$1 # exmaple "nvmeof.a,nvmeof.b"
 DELAY="${SCALING_DELAYS:-50}"
+POOL="${RBD_POOL:-mypool}"
+GROUP="${NVMEOF_GROUP:-mygroup0}"
+source /etc/ceph/nvmeof.env
 
 if [ -z "$GATEWAYS" ]; then
     echo "At least one gateway needs to be defined for scalability test"
     exit 1
 fi
 
-pip3 install yq
-
 status_checks() {
-    ceph nvme-gw show mypool ''
-    ceph orch ls
-    ceph orch ps 
-    ceph -s
+    expected_count=$1
+
+    output=$(ceph nvme-gw show $POOL $GROUP) 
+    nvme_show=$(echo $output | grep -o '"AVAILABLE"' | wc -l)
+    if [ "$nvme_show" -ne "$expected_count" ]; then
+        return 1
+    fi
+
+    orch_ls=$(ceph orch ls)
+    if ! echo "$orch_ls" | grep -q "$expected_count/$expected_count"; then
+        return 1
+    fi
+
+    output=$(ceph orch ps --service-name nvmeof.$POOL.$GROUP)     
+    orch_ps=$(echo $output | grep -o 'running' | wc -l)
+    if [ "$orch_ps" -ne "$expected_count" ]; then
+        return 1
+    fi
+
+    ceph_status=$(ceph -s)
+    if ! echo "$ceph_status" | grep -q "HEALTH_OK"; then
+        return 1
+    fi
 }
 
+total_gateways_count=$(( $(echo "$NVMEOF_GATEWAY_IP_ADDRESSES" | tr -cd ',' | wc -c) + 1 ))
+scaled_down_gateways_count=$(( total_gateways_count - $(echo "$GATEWAYS" | tr -cd ',' | wc -c) - 1 ))
+
 
 echo "[nvmeof.scale] Setting up config to remove gateways ${GATEWAYS}"
+ceph orch ls --service-name nvmeof.$POOL.$GROUP --export > /tmp/nvmeof-gw.yaml
 ceph orch ls nvmeof --export > /tmp/nvmeof-gw.yaml
 cat /tmp/nvmeof-gw.yaml
-yq "del(.placement.hosts[] | select(. | test(\".*($(echo $GATEWAYS | sed 's/,/|/g'))\")))" /tmp/nvmeof-gw.yaml > /tmp/nvmeof-gw-new.yaml
+
+pattern=$(echo $GATEWAYS | sed 's/,/\\|/g')
+sed "/$pattern/d" /tmp/nvmeof-gw.yaml > /tmp/nvmeof-gw-new.yaml  
 cat /tmp/nvmeof-gw-new.yaml
 
 echo "[nvmeof.scale] Starting scale testing by removing ${GATEWAYS}"
-status_checks
-ceph orch rm nvmeof.mypool && sleep 20 # temp workaround
+status_checks $total_gateways_count 
 ceph orch apply -i /tmp/nvmeof-gw-new.yaml # downscale
+ceph orch redeploy nvmeof.$POOL.$GROUP 
 sleep $DELAY
-status_checks
-ceph orch rm nvmeof.mypool && sleep 20 # temp workaround
+status_checks $scaled_down_gateways_count
+echo "[nvmeof.scale] Downscale complete - removed gateways (${GATEWAYS}); now scaling back up"
 ceph orch apply -i /tmp/nvmeof-gw.yaml #upscale
+ceph orch redeploy nvmeof.$POOL.$GROUP 
 sleep $DELAY
-status_checks
+status_checks $total_gateways_count
 
 echo "[nvmeof.scale] Scale testing passed for ${GATEWAYS}"
diff --git a/qa/workunits/nvmeof/setup_subsystem.sh b/qa/workunits/nvmeof/setup_subsystem.sh
index cc4024323eb..b573647b1e3 100755
--- a/qa/workunits/nvmeof/setup_subsystem.sh
+++ b/qa/workunits/nvmeof/setup_subsystem.sh
@@ -26,14 +26,21 @@ list_subsystems () {
     done
 }
 
+list_namespaces () { 
+    for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do
+        subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}"
+        sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format plain namespace list --subsystem $subsystem_nqn        
+    done
+}
+
+echo "[nvmeof] Starting subsystem setup..."
+
 # add all subsystems
 for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do
     subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}"
     sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT subsystem add --subsystem $subsystem_nqn --no-group-append
 done
 
-list_subsystems
-
 # add all gateway listeners 
 for i in "${!gateway_ips[@]}"
 do
@@ -65,11 +72,5 @@ done
 
 list_subsystems
 
-# list namespaces
-for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do
-    subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}"
-    sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format plain namespace list --subsystem $subsystem_nqn        
-done
-
 
 echo "[nvmeof] Subsystem setup done"
diff --git a/qa/workunits/rbd/cli_generic.sh b/qa/workunits/rbd/cli_generic.sh
index 2aa27d3d655..0ceb9ff54cf 100755
--- a/qa/workunits/rbd/cli_generic.sh
+++ b/qa/workunits/rbd/cli_generic.sh
@@ -914,6 +914,11 @@ test_namespace() {
 
     rbd group create rbd/test1/group1
     rbd group image add rbd/test1/group1 rbd/test1/image1
+    rbd group image add --group-pool rbd --group-namespace test1 --group group1 \
+        --image-pool rbd --image-namespace test1 --image image2
+    rbd group image rm --group-pool rbd --group-namespace test1 --group group1 \
+        --image-pool rbd --image-namespace test1 --image image1
+    rbd group image rm rbd/test1/group1 rbd/test1/image2
     rbd group rm rbd/test1/group1
 
     rbd trash move rbd/test1/image1