Merge pull request #61289 from ronen-fr/wip-rf-catcher-fix

common: fix md_config_cacher_t Reviewed-by: Radoslaw Zarzynski <rzarzyns@redhat.com>
author: Ronen Friedman <rfriedma@redhat.com> 2025-01-15 10:54:06 +0100
committer: GitHub <noreply@github.com> 2025-01-15 10:54:06 +0100
commit: 091c7c5400f49e0ca16e8462ede8754b81f17b96 (patch)
tree: 66cd84403778a21368cc1b57dd9413c8681c7ec2
parent: Merge pull request #61072 from pecastro/doc_and_test_fixes (diff)
parent: qa/scrub: more delay when waiting for noscrub to take effect (diff)
download: ceph-091c7c5400f49e0ca16e8462ede8754b81f17b96.tar.xz
ceph-091c7c5400f49e0ca16e8462ede8754b81f17b96.zip
4 files changed, 58 insertions, 50 deletions
diff --git a/qa/standalone/scrub/osd-recovery-scrub.sh b/qa/standalone/scrub/osd-recovery-scrub.sh
index 843e9b9901b..7b77a60f35b 100755
--- a/qa/standalone/scrub/osd-recovery-scrub.sh
+++ b/qa/standalone/scrub/osd-recovery-scrub.sh
@@ -163,7 +163,7 @@ function wait_for_scrub_mod() {
         fi
         sleep 1
         # are we still the primary?
-        local current_primary=`bin/ceph pg $pgid query | jq '.acting[0]' `
+        local current_primary=`./bin/ceph pg $pgid query | jq '.acting[0]' `
         if [ $orig_primary != $current_primary ]; then
             echo $orig_primary no longer primary for $pgid
             return 0
@@ -194,7 +194,7 @@ function pg_scrub_mod() {
 
     local last_scrub=$(get_last_scrub_stamp $pgid)
     # locate the primary
-    local my_primary=`bin/ceph pg $pgid query | jq '.acting[0]' `
+    local my_primary=`./bin/ceph pg $pgid query | jq '.acting[0]' `
     local recovery=false
     ceph pg scrub $pgid
     #ceph --format json pg dump pgs | jq ".pg_stats | .[] | select(.pgid == \"$pgid\") | .state"
diff --git a/qa/standalone/scrub/osd-scrub-test.sh b/qa/standalone/scrub/osd-scrub-test.sh
index 85a45a421c1..385479258f2 100755
--- a/qa/standalone/scrub/osd-scrub-test.sh
+++ b/qa/standalone/scrub/osd-scrub-test.sh
@@ -603,17 +603,16 @@ function TEST_dump_scrub_schedule() {
     declare -A expct_dmp_duration=( ['dmp_last_duration']="0" ['dmp_last_duration_neg']="not0" )
     wait_any_cond $pgid 10 $saved_last_stamp expct_dmp_duration "WaitingAfterScrub_dmp " sched_data || return 1
 
-    sleep 2
-
     #
     # step 2: set noscrub and request a "periodic scrub". Watch for the change in the 'is the scrub
     #         scheduled for the future' value
     #
 
-    ceph tell osd.* config set osd_shallow_scrub_chunk_max "3" || return 1
-    ceph tell osd.* config set osd_scrub_sleep "2.0" || return 1
     ceph osd set noscrub || return 1
     sleep 2
+    ceph tell osd.* config set osd_shallow_scrub_chunk_max "3" || return 1
+    ceph tell osd.* config set osd_scrub_sleep "2.0" || return 1
+    sleep 8
     saved_last_stamp=${sched_data['query_last_stamp']}
 
     ceph tell $pgid schedule-scrub
@@ -692,28 +691,28 @@ function wait_initial_scrubs() {
 
     # set a long schedule for the periodic scrubs. Wait for the
     # initial 'no previous scrub is known' scrubs to finish for all PGs.
-    bin/ceph tell osd.* config set osd_scrub_min_interval 7200
-    bin/ceph tell osd.* config set osd_deep_scrub_interval 14400
-    bin/ceph tell osd.* config set osd_max_scrubs 32
-    bin/ceph tell osd.* config set osd_scrub_sleep 0
-    bin/ceph tell osd.* config set osd_shallow_scrub_chunk_max 10
-    bin/ceph tell osd.* config set osd_scrub_chunk_max 10
+    ceph tell osd.* config set osd_scrub_min_interval 7200
+    ceph tell osd.* config set osd_deep_scrub_interval 14400
+    ceph tell osd.* config set osd_max_scrubs 32
+    ceph tell osd.* config set osd_scrub_sleep 0
+    ceph tell osd.* config set osd_shallow_scrub_chunk_max 10
+    ceph tell osd.* config set osd_scrub_chunk_max 10
 
     for pg in "${!pg_to_prim_dict[@]}"; do
       (( extr_dbg >= 1 )) && echo "Scheduling initial scrub for $pg"
-      bin/ceph tell $pg scrub || return 1
+      ceph tell $pg scrub || return 1
     done
 
     sleep 1
-    (( extr_dbg >= 1 )) && bin/ceph pg dump pgs --format=json-pretty | \
+    (( extr_dbg >= 1 )) && ceph pg dump pgs --format=json-pretty | \
       jq '.pg_stats | map(select(.last_scrub_duration == 0)) | map({pgid: .pgid, last_scrub_duration: .last_scrub_duration})'
 
     tout=20
     while [ $tout -gt 0 ] ; do
       sleep 0.5
-      (( extr_dbg >= 2 )) && bin/ceph pg dump pgs --format=json-pretty | \
+      (( extr_dbg >= 2 )) && ceph pg dump pgs --format=json-pretty | \
         jq '.pg_stats | map(select(.last_scrub_duration == 0)) | map({pgid: .pgid, last_scrub_duration: .last_scrub_duration})'
-      not_done=$(bin/ceph pg dump pgs --format=json-pretty | \
+      not_done=$(ceph pg dump pgs --format=json-pretty | \
         jq '.pg_stats | map(select(.last_scrub_duration == 0)) | map({pgid: .pgid, last_scrub_duration: .last_scrub_duration})' | wc -l )
       # note that we should ignore a header line
       if [ "$not_done" -le 1 ]; then
@@ -782,14 +781,14 @@ function TEST_abort_periodic_for_operator() {
     wait_initial_scrubs pg_pr || return 1
 
     # limit all OSDs to one scrub at a time
-    bin/ceph tell osd.* config set osd_max_scrubs 1
-    bin/ceph tell osd.* config set osd_stats_update_period_not_scrubbing 1
+    ceph tell osd.* config set osd_max_scrubs 1
+    ceph tell osd.* config set osd_stats_update_period_not_scrubbing 1
 
     # configure for slow scrubs
-    bin/ceph tell osd.* config set osd_scrub_sleep 3
-    bin/ceph tell osd.* config set osd_shallow_scrub_chunk_max 2
-    bin/ceph tell osd.* config set osd_scrub_chunk_max 2
-    (( extr_dbg >= 2 )) && bin/ceph tell osd.2 dump_scrub_reservations --format=json-pretty
+    ceph tell osd.* config set osd_scrub_sleep 3
+    ceph tell osd.* config set osd_shallow_scrub_chunk_max 2
+    ceph tell osd.* config set osd_scrub_chunk_max 2
+    (( extr_dbg >= 2 )) && ceph tell osd.2 dump_scrub_reservations --format=json-pretty
 
     # the first PG to work with:
     local pg1="1.0"
@@ -812,7 +811,7 @@ function TEST_abort_periodic_for_operator() {
     fi
 
     # the common primary is allowed two concurrent scrubs
-    bin/ceph tell osd."${pg_pr[$pg1]}" config set osd_max_scrubs 2
+    ceph tell osd."${pg_pr[$pg1]}" config set osd_max_scrubs 2
     echo "The two PGs to manipulate are $pg1 and $pg2"
 
     set_query_debug "$pg1"
@@ -821,31 +820,31 @@ function TEST_abort_periodic_for_operator() {
     local is_act
     for i in $( seq 1 3 )
     do
-      is_act=$(bin/ceph pg "$pg1" query | jq '.scrubber.active')
+      is_act=$(ceph pg "$pg1" query | jq '.scrubber.active')
       if [[ "$is_act" = "false" ]]; then
           break
       fi
       echo "Still waiting for pg $pg1 to finish scrubbing"
       sleep 0.7
     done
-    bin/ceph pg dump pgs
+    ceph pg dump pgs
     if [[ "$is_act" != "false" ]]; then
-      bin/ceph pg "$pg1" query
+      ceph pg "$pg1" query
       echo "PG $pg1 appears to be still scrubbing"
       return 1
     fi
     sleep 0.5
 
     echo "Initiating a periodic scrub of $pg1"
-    (( extr_dbg >= 2 )) && bin/ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
-    bin/ceph tell $pg1 schedule-deep-scrub || return 1
+    (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
+    ceph tell $pg1 schedule-deep-scrub || return 1
     sleep 1
-    (( extr_dbg >= 2 )) && bin/ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
+    (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
 
     for i in $( seq 1 14 )
     do
       sleep 0.5
-      stt=$(bin/ceph pg "$pg1" query | jq '.scrubber')
+      stt=$(ceph pg "$pg1" query | jq '.scrubber')
       is_active=$(echo $stt | jq '.active')
       is_reserving_replicas=$(echo $stt | jq '.is_reserving_replicas')
       if [[ "$is_active" = "true" && "$is_reserving_replicas" = "false" ]]; then
@@ -854,49 +853,49 @@ function TEST_abort_periodic_for_operator() {
       echo "Still waiting for pg $pg1 to start scrubbing: $stt"
     done
     if [[ "$is_active" != "true" || "$is_reserving_replicas" != "false" ]]; then
-      bin/ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
+      ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
       echo "The scrub is not active or is reserving replicas"
       return 1
     fi
-    (( extr_dbg >= 2 )) && bin/ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
+    (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
 
 
     # PG 1 is scrubbing, and has reserved the replicas - soem of which are shared
     # by PG 2. As the max-scrubs was set to 1, that should prevent PG 2 from
     # reserving its replicas.
 
-    (( extr_dbg >= 1 )) && bin/ceph tell osd.* dump_scrub_reservations --format=json-pretty
+    (( extr_dbg >= 1 )) && ceph tell osd.* dump_scrub_reservations --format=json-pretty
 
     # now - the 2'nd scrub - which should be blocked on reserving
     set_query_debug "$pg2"
-    bin/ceph tell "$pg2" schedule-deep-scrub
+    ceph tell "$pg2" schedule-deep-scrub
     sleep 0.5
     (( extr_dbg >= 2 )) && echo "===================================================================================="
-    (( extr_dbg >= 2 )) && bin/ceph pg "$pg2" query -f json-pretty | jq '.scrubber'
-    (( extr_dbg >= 2 )) && bin/ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
+    (( extr_dbg >= 2 )) && ceph pg "$pg2" query -f json-pretty | jq '.scrubber'
+    (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
     sleep 1
     (( extr_dbg >= 2 )) && echo "===================================================================================="
-    (( extr_dbg >= 2 )) && bin/ceph pg "$pg2" query -f json-pretty | jq '.scrubber'
-    (( extr_dbg >= 2 )) && bin/ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
+    (( extr_dbg >= 2 )) && ceph pg "$pg2" query -f json-pretty | jq '.scrubber'
+    (( extr_dbg >= 2 )) && ceph pg "$pg1" query -f json-pretty | jq '.scrubber'
 
     # make sure pg2 scrub is stuck in the reserving state
-    local stt2=$(bin/ceph pg "$pg2" query | jq '.scrubber')
+    local stt2=$(ceph pg "$pg2" query | jq '.scrubber')
     local pg2_is_reserving
     pg2_is_reserving=$(echo $stt2 | jq '.is_reserving_replicas')
     if [[ "$pg2_is_reserving" != "true" ]]; then
       echo "The scheduled scrub for $pg2 should have been stuck"
-      bin/ceph pg dump pgs
+      ceph pg dump pgs
       return 1
     fi
 
     # now - issue an operator-initiated scrub on pg2.
     # The periodic scrub should be aborted, and the operator-initiated scrub should start.
     echo "Instructing $pg2 to perform a high-priority scrub"
-    bin/ceph tell "$pg2" scrub
+    ceph tell "$pg2" scrub
     for i in $( seq 1 10 )
     do
       sleep 0.5
-      stt2=$(bin/ceph pg "$pg2" query | jq '.scrubber')
+      stt2=$(ceph pg "$pg2" query | jq '.scrubber')
       pg2_is_active=$(echo $stt2 | jq '.active')
       pg2_is_reserving=$(echo $stt2 | jq '.is_reserving_replicas')
       if [[ "$pg2_is_active" = "true" && "$pg2_is_reserving" != "true" ]]; then
diff --git a/qa/standalone/scrub/scrub-helpers.sh b/qa/standalone/scrub/scrub-helpers.sh
index 0b14d6028b6..dd37b643e08 100644
--- a/qa/standalone/scrub/scrub-helpers.sh
+++ b/qa/standalone/scrub/scrub-helpers.sh
@@ -320,7 +320,7 @@ function build_pg_dicts {
 
   # if the infile name is '-', fetch the dump directly from the ceph cluster
   if [[ $infile == "-" ]]; then
-    local -r ceph_cmd="bin/ceph pg dump pgs_brief -f=json-pretty"
+    local -r ceph_cmd="ceph pg dump pgs_brief -f=json-pretty"
     local -r ceph_cmd_out=$(eval $ceph_cmd)
     local -r ceph_cmd_rc=$?
     if [[ $ceph_cmd_rc -ne 0 ]]; then
diff --git a/src/common/config_cacher.h b/src/common/config_cacher.h
index 91b8152dde1..f23195955a1 100644
--- a/src/common/config_cacher.h
+++ b/src/common/config_cacher.h
@@ -18,21 +18,30 @@
 #include "common/config_obs.h"
 #include "common/config.h"
 
+/**
+ * A simple class to cache a single configuration value.
+ * Points to note:
+ * - as get_tracked_conf_keys() must return a pointer to a null-terminated
+ *   array of C-strings, 'keys' - an array - is used to hold the sole key
+ *   that this observer is interested in.
+ * - the const cast should be removed once we change the
+ *   get_tracked_conf_keys() to return const char* const * (or something
+ *   similar).
+ */
 template <typename ValueT>
 class md_config_cacher_t : public md_config_obs_t {
   ConfigProxy& conf;
-  const char* const option_name;
+  const char* keys[2];
   std::atomic<ValueT> value_cache;
 
   const char** get_tracked_conf_keys() const override {
-    const static char* keys[] = { option_name, nullptr };
-    return keys;
+    return const_cast<const char**>(keys);
   }
 
   void handle_conf_change(const ConfigProxy& conf,
                           const std::set<std::string>& changed) override {
-    if (changed.count(option_name)) {
-      value_cache.store(conf.get_val<ValueT>(option_name));
+    if (changed.contains(keys[0])) {
+      value_cache.store(conf.get_val<ValueT>(keys[0]));
     }
   }
 
@@ -40,10 +49,10 @@ public:
   md_config_cacher_t(ConfigProxy& conf,
                      const char* const option_name)
     : conf(conf),
-      option_name(option_name) {
+      keys{option_name, nullptr} {
     conf.add_observer(this);
     std::atomic_init(&value_cache,
-                     conf.get_val<ValueT>(option_name));
+                     conf.get_val<ValueT>(keys[0]));
   }
 
   ~md_config_cacher_t() {
author	Ronen Friedman <rfriedma@redhat.com>	2025-01-15 10:54:06 +0100
committer	GitHub <noreply@github.com>	2025-01-15 10:54:06 +0100
commit	091c7c5400f49e0ca16e8462ede8754b81f17b96 (patch)
tree	66cd84403778a21368cc1b57dd9413c8681c7ec2
parent	Merge pull request #61072 from pecastro/doc_and_test_fixes (diff)
parent	qa/scrub: more delay when waiting for noscrub to take effect (diff)
download	ceph-091c7c5400f49e0ca16e8462ede8754b81f17b96.tar.xz ceph-091c7c5400f49e0ca16e8462ede8754b81f17b96.zip