qa/standalone/scrub/scrub-helpers.sh


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418

#!/usr/bin/env bash
# @file   scrub-helpers.sh
# @brief  a collection of bash functions useful for scrub standalone tests
#

# extract_published_sch()
#
# Use the output from both 'ceph pg dump pgs' and 'ceph pg x.x query' commands to determine
# the published scrub scheduling status of a given PG.
#
# $1: pg id
# $2: 'current' time to compare to
# $3: an additional time-point to compare to
# $4: [out] dictionary
#
function extract_published_sch() {
  local pgn="$1"
  local -n dict=$4 # a ref to the in/out dictionary
  local current_time=$2
  local extra_time=$3
  local extr_dbg=2 # note: 3 and above leave some temp files around

  #turn off '-x' (but remember previous state)
  local saved_echo_flag=${-//[^x]/}
  set +x

  (( extr_dbg >= 3 )) && ceph pg dump pgs -f json-pretty >> /tmp/a_dmp$$
  (( extr_dbg >= 3 )) && ceph pg $1 query -f json-pretty >> /tmp/a_qry$$

  from_dmp=`ceph pg dump pgs -f json-pretty | jq -r --arg pgn "$pgn" --arg extra_dt "$extra_time" --arg current_dt "$current_time" '[
    [[.pg_stats[]] | group_by(.pg_stats)][0][0] | 
    [.[] |
    select(has("pgid") and .pgid == $pgn) |

        (.dmp_stat_part=(.scrub_schedule | if test(".*@.*") then (split(" @ ")|first) else . end)) |
        (.dmp_when_part=(.scrub_schedule | if test(".*@.*") then (split(" @ ")|last) else "0" end)) |

     [ {
       dmp_pg_state: .state,
       dmp_state_has_scrubbing: (.state | test(".*scrub.*";"i")),
       dmp_last_duration:.last_scrub_duration,
       dmp_schedule: .dmp_stat_part,
       dmp_schedule_at: .dmp_when_part,
       dmp_is_future: ( .dmp_when_part > $current_dt ),
       dmp_vs_date: ( .dmp_when_part > $extra_dt  ),
       dmp_reported_epoch: .reported_epoch,
       dmp_seq: .reported_seq
      }] ]][][][]'`

  (( extr_dbg >= 2 )) && echo "from pg dump pg: $from_dmp"
  (( extr_dbg >= 2 )) && echo "query output:"
  (( extr_dbg >= 2 )) && ceph pg $1 query -f json-pretty | awk -e '/scrubber/,/agent_state/ {print;}'

  # note: the query output for the schedule containas two dates: the first is the not-before, and
  # the second is the original target time (which is before or the same as the not-before)
  # the current line format looks like this:
  # "schedule": "scrub scheduled @ 2024-06-26T16:09:56.666 (2024-06-24T16:09:56.338)"
  from_qry=`ceph pg $1 query -f json-pretty | jq -r --arg extra_dt "$extra_time" --arg current_dt "$current_time"  --arg spt "'" '
    . |
        (.q_stat_part=((.scrubber.schedule// "-") | if test(".*@.*") then (split(" @ ")|first) else . end)) |
        (.q_when_part=((.scrubber.schedule// "0") | if test(".*@.*") then (split(" @ ")|last) else "0" end)) |
        (.q_target=((.scrubber.schedule// "0") | if test(".*@.*") then (split(" @ ")|last|split(" (")|last|split(")")|first) else "0" end)) |
        (.q_not_before=((.scrubber.schedule// "0") | if test(".*@.*") then (split(" @ ")|last|split(" (")|first) else "0" end)) |
	(.q_when_is_future=(.q_target > $current_dt)) |
	(.q_vs_date=(.q_when_part > $extra_dt)) |	
      {
        query_epoch: .epoch,
        query_seq: .info.stats.reported_seq,
        query_active: (.scrubber | if has("active") then .active else "bug" end),
        query_schedule: .q_stat_part,
        #query_schedule_at: .q_when_part,
        query_schedule_at: .q_not_before,
        query_target_at: .q_target,
        query_last_duration: .info.stats.last_scrub_duration,
        query_last_stamp: .info.history.last_scrub_stamp,
        query_last_scrub: (.info.history.last_scrub| sub($spt;"x") ),
        query_is_future: .q_when_is_future,
        query_vs_date: .q_vs_date,
        query_scrub_seq: .scrubber.test_sequence
      }
   '`
  (( extr_dbg >= 1 )) && echo $from_qry " " $from_dmp | jq -s -r 'add | "(",(to_entries | .[] | "["+(.key)+"]="+(.value|@sh)),")"'

  # note that using a ref to an associative array directly is tricky. Instead - we are copying:
  local -A dict_src=`echo $from_qry " " $from_dmp | jq -s -r 'add | "(",(to_entries | .[] | "["+(.key)+"]="+(.value|@sh)),")"'`
  dict=()
  for k in "${!dict_src[@]}"; do dict[$k]=${dict_src[$k]}; done

  if [[ -n "$saved_echo_flag" ]]; then set -x; fi
}

# query the PG, until any of the conditions in the 'expected' array are met
#
# A condition may be negated by an additional entry in the 'expected' array. Its
# form should be:
#  key: the original key, with a "_neg" suffix;
#  Value: not checked
#
# $1: pg id
# $2: max retries
# $3: a date to use in comparisons
# $4: set of K/V conditions
# $5: debug message
# $6: [out] the results array
function wait_any_cond() {
  local pgid="$1"
  local retries=$2
  local cmp_date=$3
  local -n ep=$4
  local -n out_array=$6
  local -A sc_data
  local extr_dbg=2

  #turn off '-x' (but remember previous state)
  local saved_echo_flag=${-//[^x]/}
  set +x

  local now_is=`date -I"ns"`
  (( extr_dbg >= 2 )) && echo "waiting for any condition ($5): pg:$pgid dt:$cmp_date ($retries retries)"

  for i in $(seq 1 $retries)
  do
    sleep 0.5
    extract_published_sch $pgid $now_is $cmp_date sc_data
    (( extr_dbg >= 4 )) && echo "${sc_data['dmp_last_duration']}"
    (( extr_dbg >= 4 )) && echo "----> loop:  $i  ~ ${sc_data['dmp_last_duration']}  / " ${sc_data['query_vs_date']} " /   ${sc_data['dmp_is_future']}"
    (( extr_dbg >= 2 )) && echo "--> loop:  $i ~ ${sc_data['query_active']} / ${sc_data['query_seq']} / ${sc_data['dmp_seq']} " \
                      "/ ${sc_data['query_is_future']} / ${sc_data['query_last_stamp']} / ${sc_data['query_schedule']} %%% ${!ep[@]}"

    # perform schedule_against_expected(), but with slightly different out-messages behaviour
    for k_ref in "${!ep[@]}"
    do
      (( extr_dbg >= 3 )) && echo "key is $k_ref"
      # is this a real key, or just a negation flag for another key??
      [[ $k_ref =~ "_neg" ]] && continue

      local act_val=${sc_data[$k_ref]}
      local exp_val=${ep[$k_ref]}

      # possible negation? look for a matching key
      local neg_key="${k_ref}_neg"
      (( extr_dbg >= 3 )) && echo "neg-key is $neg_key"
      if [ -v 'ep[$neg_key]' ]; then
        is_neg=1
      else
        is_neg=0
      fi

      (( extr_dbg >= 1 )) && echo "key is $k_ref: negation:$is_neg # expected: $exp_val # in actual: $act_val"
      is_eq=0
      [[ $exp_val == $act_val ]] && is_eq=1
      if (($is_eq ^ $is_neg))  
      then
        echo "$5 - '$k_ref' actual value ($act_val) matches expected ($exp_val) (negation: $is_neg)"
        for k in "${!sc_data[@]}"; do out_array[$k]=${sc_data[$k]}; done
        if [[ -n "$saved_echo_flag" ]]; then set -x; fi
        return 0
      fi
    done
  done

  echo "$5: wait_any_cond(): failure. Note: query-active=${sc_data['query_active']}"
  if [[ -n "$saved_echo_flag" ]]; then set -x; fi
  return 1
}


# schedule_against_expected()
#
# Compare the scrub scheduling state collected by extract_published_sch() to a set of expected values.
# All values are expected to match.
#
# $1: the published scheduling state
# $2: a set of conditions to verify
# $3: text to be echoed for a failed match
#
function schedule_against_expected() {
  local -n dict=$1 # a ref to the published state
  local -n ep=$2  # the expected results
  local extr_dbg=1

  # turn off '-x' (but remember previous state)
  local saved_echo_flag=${-//[^x]/}
  set +x

  (( extr_dbg >= 1 )) && echo "-- - comparing:"
  for k_ref in "${!ep[@]}"
  do
    local act_val=${dict[$k_ref]}
    local exp_val=${ep[$k_ref]}
    (( extr_dbg >= 1 )) && echo "key is " $k_ref "  expected: " $exp_val " in actual: " $act_val
    if [[ $exp_val != $act_val ]]
    then
      echo "$3 - '$k_ref' actual value ($act_val) differs from expected ($exp_val)"
      echo '####################################################^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^'

      if [[ -n "$saved_echo_flag" ]]; then set -x; fi
      return 1
    fi
  done

  if [[ -n "$saved_echo_flag" ]]; then set -x; fi
  return 0
}


# Start the cluster "nodes" and create a pool for testing.
#
# The OSDs are started with a set of parameters aimed in creating a repeatable
# and stable scrub sequence:
#  - no scrub randomizations/backoffs
#  - no autoscaler
#
# $1: the test directory
# $2: [in/out] an array of configuration values
#
# The function adds/updates the configuration dictionary with the name of the
# pool created, and its ID.
#
# Argument 2 might look like this:
#
#  declare -A test_conf=( 
#    ['osds_num']="3"
#    ['pgs_in_pool']="7"
#    ['extras']="--extra1 --extra2"
#    ['pool_name']="testpl"
#  )
function standard_scrub_cluster() {
    local dir=$1
    local -n args=$2

    local OSDS=${args['osds_num']:-"3"}
    local pg_num=${args['pgs_in_pool']:-"8"}
    local poolname="${args['pool_name']:-test}"
    args['pool_name']=$poolname
    local extra_pars=${args['extras']}
    local debug_msg=${args['msg']:-"dbg"}

    # turn off '-x' (but remember previous state)
    local saved_echo_flag=${-//[^x]/}
    set +x

    run_mon $dir a --osd_pool_default_size=3 || return 1
    run_mgr $dir x --mgr_stats_period=1 || return 1

    local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
            --osd_scrub_interval_randomize_ratio=0 \
            --osd_scrub_backoff_ratio=0.0 \
            --osd_pool_default_pg_autoscale_mode=off \
            --osd_pg_stat_report_interval_max_seconds=1 \
            --osd_pg_stat_report_interval_max_epochs=1 \
            --osd_stats_update_period_not_scrubbing=3 \
            --osd_stats_update_period_scrubbing=1 \
            --osd_scrub_retry_after_noscrub=5 \
            --osd_scrub_retry_pg_state=5 \
            --osd_scrub_retry_delay=3 \
            --osd_pool_default_size=3 \
            $extra_pars"

    for osd in $(seq 0 $(expr $OSDS - 1))
    do
      run_osd $dir $osd $(echo $ceph_osd_args) || return 1
    done

    create_pool $poolname $pg_num $pg_num
    wait_for_clean || return 1

    # update the in/out 'args' with the ID of the new pool
    sleep 1
    name_n_id=`ceph osd dump | awk '/^pool.*'$poolname'/ { gsub(/'"'"'/," ",$3); print $3," ", $2}'`
    echo "standard_scrub_cluster: $debug_msg: test pool is $name_n_id"
    args['pool_id']="${name_n_id##* }"
    args['osd_args']=$ceph_osd_args
    if [[ -n "$saved_echo_flag" ]]; then set -x; fi
}


# Start the cluster "nodes" and create a pool for testing - wpq version.
#
# A variant of standard_scrub_cluster() that selects the wpq scheduler and sets a value to
# osd_scrub_sleep. To be used when the test is attempting to "catch" the scrubber during an
# ongoing scrub.
#
# See standard_scrub_cluster() for more details.
#
# $1: the test directory
# $2: [in/out] an array of configuration values
# $3: osd_scrub_sleep
#
# The function adds/updates the configuration dictionary with the name of the
# pool created, and its ID.
function standard_scrub_wpq_cluster() {
    local dir=$1
    local -n conf=$2
    local osd_sleep=$3

    conf['extras']=" --osd_op_queue=wpq --osd_scrub_sleep=$osd_sleep ${conf['extras']}"

    standard_scrub_cluster $dir conf || return 1
}


# Parse the output of a 'pg dump pgs_brief' command and build a set of dictionaries:
# - pg_primary_dict: a dictionary of pgid -> acting_primary
# - pg_acting_dict: a dictionary of pgid -> acting set
# - pg_pool_dict: a dictionary of pgid -> pool
# If the input file is '-', the function will fetch the dump directly from the ceph cluster.
function build_pg_dicts {
  local dir=$1
  local -n pg_primary_dict=$2
  local -n pg_acting_dict=$3
  local -n pg_pool_dict=$4
  local infile=$5

  local extr_dbg=0 # note: 3 and above leave some temp files around

  #turn off '-x' (but remember previous state)
  local saved_echo_flag=${-//[^x]/}
  set +x

  # if the infile name is '-', fetch the dump directly from the ceph cluster
  if [[ $infile == "-" ]]; then
    local -r ceph_cmd="bin/ceph pg dump pgs_brief -f=json-pretty"
    local -r ceph_cmd_out=$(eval $ceph_cmd)
    local -r ceph_cmd_rc=$?
    if [[ $ceph_cmd_rc -ne 0 ]]; then
      echo "Error: the command '$ceph_cmd' failed with return code $ceph_cmd_rc"
    fi
    (( extr_dbg >= 3 )) && echo "$ceph_cmd_out" > /tmp/e2
    l0=`echo "$ceph_cmd_out" | jq '[.pg_stats | group_by(.pg_stats)[0] | map({pgid: .pgid, pool: (.pgid | split(".")[0]), acting: .acting, acting_primary: .acting_primary})] | .[]' `
  else
    l0=`jq '[.pg_stats | group_by(.pg_stats)[0] | map({pgid: .pgid, pool: (.pgid | split(".")[0]), acting: .acting, acting_primary: .acting_primary})] | .[]' $infile `
  fi
  (( extr_dbg >= 2 )) && echo "L0: $l0"

  mapfile -t l1 < <(echo "$l0" | jq -c '.[]')
  (( extr_dbg >= 2 )) && echo "L1: ${#l1[@]}"

  for item in "${l1[@]}"; do
    pgid=$(echo "$item" | jq -r '.pgid')
    acting=$(echo "$item" | jq -r '.acting | @sh')
    pg_acting_dict["$pgid"]=$acting
    acting_primary=$(echo "$item" | jq -r '.acting_primary')
    pg_primary_dict["$pgid"]=$acting_primary
    pool=$(echo "$item" | jq -r '.pool')
    pg_pool_dict["$pgid"]=$pool
  done

  if [[ -n "$saved_echo_flag" ]]; then set -x; fi
}


# a function that counts the number of common active-set elements between two PGs
# 1 - the first PG
# 2 - the second PG
# 3 - the dictionary of active sets
function count_common_active {
  local pg1=$1
  local pg2=$2
  local -n pg_acting_dict=$3
  local -n res=$4

  local -a a1=(${pg_acting_dict[$pg1]})
  local -a a2=(${pg_acting_dict[$pg2]})

  local -i cnt=0
  for i in "${a1[@]}"; do
    for j in "${a2[@]}"; do
      if [[ $i -eq $j ]]; then
        cnt=$((cnt+1))
      fi
    done
  done

  res=$cnt
}


# given a PG, find another one with a disjoint active set
# - but allow a possible common Primary
# 1 - the PG
# 2 - the dictionary of active sets
# 3 - [out] - the PG with a disjoint active set
function find_disjoint_but_primary {
  local pg=$1
  local -n ac_dict=$2
  local -n p_dict=$3
  local -n res=$4

  for cand in "${!ac_dict[@]}"; do
    if [[ "$cand" != "$pg" ]]; then
      local -i common=0
      count_common_active "$pg" "$cand" ac_dict common
      if [[ $common -eq 0 || ( $common -eq 1 && "${p_dict[$pg]}" == "${p_dict[$cand]}" )]]; then
        res=$cand
        return
      fi
    fi
  done
}


# A debug flag is set for the PG specified, causing the 'pg query' command to display
# an additional 'scrub sessions counter' field.
#
# $1: PG id
#
function set_query_debug() {
    local pgid=$1
    local prim_osd=`ceph pg dump pgs_brief | \
      awk -v pg="^$pgid" -n -e '$0 ~ pg { print(gensub(/[^0-9]*([0-9]+).*/,"\\\\1","g",$5)); }' `

    echo "Setting scrub debug data. Primary for $pgid is $prim_osd"
    CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.$prim_osd) \
          scrubdebug $pgid set sessions
}