summaryrefslogtreecommitdiffstats
path: root/qa/standalone/scrub/osd-scrub-repair.sh
diff options
context:
space:
mode:
Diffstat (limited to 'qa/standalone/scrub/osd-scrub-repair.sh')
-rwxr-xr-xqa/standalone/scrub/osd-scrub-repair.sh256
1 files changed, 253 insertions, 3 deletions
diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh
index b717026e191..6dd5b10ae8f 100755
--- a/qa/standalone/scrub/osd-scrub-repair.sh
+++ b/qa/standalone/scrub/osd-scrub-repair.sh
@@ -442,7 +442,6 @@ function TEST_auto_repair_bluestore_basic() {
['pool_name']="testpool"
['extras']=" --osd_scrub_auto_repair=true"
)
- local extr_dbg=3
standard_scrub_cluster $dir cluster_conf
local poolid=${cluster_conf['pool_id']}
local poolname=${cluster_conf['pool_name']}
@@ -5754,11 +5753,13 @@ function TEST_corrupt_scrub_erasure_overwrites() {
#
# Test to make sure that a periodic scrub won't cause deep-scrub info to be lost
+# Update 2024: this functionality was removed from the code. The test will be skipped.
#
function TEST_periodic_scrub_replicated() {
local dir=$1
local poolname=psr_pool
local objname=POBJ
+ return 0
run_mon $dir a --osd_pool_default_size=2 || return 1
run_mgr $dir x || return 1
@@ -5795,12 +5796,13 @@ function TEST_periodic_scrub_replicated() {
flush_pg_stats
local last_scrub=$(get_last_scrub_stamp $pg)
- # Fake a schedule scrub
+ # Fake a scheduled deep scrub
ceph tell $pg schedule-scrub || return 1
# Wait for schedule regular scrub
wait_for_scrub $pg "$last_scrub"
# It needed to be upgraded
+ # update 2024: the "upgrade" functionality has been removed
grep -q "Deep scrub errors, upgrading scrub to deep-scrub" $dir/osd.${primary}.log || return 1
# Bad object still known
@@ -5831,7 +5833,7 @@ function TEST_periodic_scrub_replicated() {
flush_pg_stats
# Request a regular scrub and it will be done
- pg_schedule_scrub $pg
+ pg_scrub $pg
grep -q "Regular scrub request, deep-scrub details will be lost" $dir/osd.${primary}.log || return 1
# deep-scrub error is no longer present
@@ -6249,6 +6251,254 @@ function TEST_request_scrub_priority() {
grep "log_channel.*scrub ok" $dir/osd.${primary}.log | grep -v purged_snaps | head -1 | sed 's/.*[[]DBG[]]//' | grep -q $pg || return 1
}
+#
+# Testing the "split scrub store" feature: shallow scrubs do not
+# purge deep errors from the store.
+#
+# Corrupt one copy of a replicated pool, creating both shallow and deep errors.
+# Then shallow-scrub the pool and verify that the deep errors are still present.
+#
+function TEST_dual_store_replicated_cluster() {
+ local dir=$1
+ local poolname=csr_pool
+ local total_objs=19
+ local extr_dbg=1 # note: 3 and above leave some temp files around
+
+ run_mon $dir a --osd_pool_default_size=2 || return 1
+ run_mgr $dir x --mgr_stats_period=1 || return 1
+ local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 "
+ ceph_osd_args+="--osd_scrub_backoff_ratio=0 --osd_stats_update_period_not_scrubbing=3 "
+ ceph_osd_args+="--osd_stats_update_period_scrubbing=2 --osd_op_queue=wpq --osd_scrub_auto_repair=0 "
+ for osd in $(seq 0 1)
+ do
+ run_osd $dir $osd $ceph_osd_args || return 1
+ done
+
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+
+ create_pool foo 1 || return 1
+ create_pool $poolname 1 1 || return 1
+ wait_for_clean || return 1
+
+ ceph osd pool set $poolname noscrub 1
+ ceph osd pool set $poolname nodeep-scrub 1
+
+ for i in $(seq 1 $total_objs) ; do
+ objname=ROBJ${i}
+ add_something $dir $poolname $objname || return 1
+
+ rados --pool $poolname setomapheader $objname hdr-$objname || return 1
+ rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1
+ done
+
+ # Increase file 1 MB + 1KB
+ dd if=/dev/zero of=$dir/new.ROBJ19 bs=1024 count=1025
+ rados --pool $poolname put $objname $dir/new.ROBJ19 || return 1
+ rm -f $dir/new.ROBJ19
+
+ local pg=$(get_pg $poolname ROBJ0)
+ local primary=$(get_primary $poolname ROBJ0)
+
+ # Compute an old omap digest and save oi
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) \
+ config set osd_deep_scrub_update_digest_min_age 0
+ CEPH_ARGS='' ceph daemon $(get_asok_path osd.1) \
+ config set osd_deep_scrub_update_digest_min_age 0
+ pg_deep_scrub $pg
+
+ for i in $(seq 1 $total_objs) ; do
+ objname=ROBJ${i}
+
+ # Alternate corruption between osd.0 and osd.1
+ local osd=$(expr $i % 2)
+
+ case $i in
+ 1)
+ # Size (deep scrub data_digest too)
+ local payload=UVWXYZZZ
+ echo $payload > $dir/CORRUPT
+ objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1
+ ;;
+
+ 2)
+ # digest (deep scrub only)
+ local payload=UVWXYZ
+ echo $payload > $dir/CORRUPT
+ objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1
+ ;;
+
+ 3)
+ # missing
+ objectstore_tool $dir $osd $objname remove || return 1
+ ;;
+
+ 4)
+ # Modify omap value (deep scrub only)
+ objectstore_tool $dir $osd $objname set-omap key-$objname $dir/CORRUPT || return 1
+ ;;
+
+ 5)
+ # Delete omap key (deep scrub only)
+ objectstore_tool $dir $osd $objname rm-omap key-$objname || return 1
+ ;;
+
+ 6)
+ # Add extra omap key (deep scrub only)
+ echo extra > $dir/extra-val
+ objectstore_tool $dir $osd $objname set-omap key2-$objname $dir/extra-val || return 1
+ rm $dir/extra-val
+ ;;
+
+ 7)
+ # Modify omap header (deep scrub only)
+ echo -n newheader > $dir/hdr
+ objectstore_tool $dir $osd $objname set-omaphdr $dir/hdr || return 1
+ rm $dir/hdr
+ ;;
+
+ 8)
+ rados --pool $poolname setxattr $objname key1-$objname val1-$objname || return 1
+ rados --pool $poolname setxattr $objname key2-$objname val2-$objname || return 1
+
+ # Break xattrs
+ echo -n bad-val > $dir/bad-val
+ objectstore_tool $dir $osd $objname set-attr _key1-$objname $dir/bad-val || return 1
+ objectstore_tool $dir $osd $objname rm-attr _key2-$objname || return 1
+ echo -n val3-$objname > $dir/newval
+ objectstore_tool $dir $osd $objname set-attr _key3-$objname $dir/newval || return 1
+ rm $dir/bad-val $dir/newval
+ ;;
+
+ 9)
+ objectstore_tool $dir $osd $objname get-attr _ > $dir/robj9-oi
+ echo -n D > $dir/change
+ rados --pool $poolname put $objname $dir/change
+ objectstore_tool $dir $osd $objname set-attr _ $dir/robj9-oi
+ rm $dir/oi $dir/change
+ ;;
+
+ # ROBJ10 must be handled after digests are re-computed by a deep scrub below
+ # ROBJ11 must be handled with config change before deep scrub
+ # ROBJ12 must be handled with config change before scrubs
+ # ROBJ13 must be handled before scrubs
+
+ 14)
+ echo -n bad-val > $dir/bad-val
+ objectstore_tool $dir 0 $objname set-attr _ $dir/bad-val || return 1
+ objectstore_tool $dir 1 $objname rm-attr _ || return 1
+ rm $dir/bad-val
+ ;;
+
+ 15)
+ objectstore_tool $dir $osd $objname rm-attr _ || return 1
+ ;;
+
+ 16)
+ objectstore_tool $dir 0 $objname rm-attr snapset || return 1
+ echo -n bad-val > $dir/bad-val
+ objectstore_tool $dir 1 $objname set-attr snapset $dir/bad-val || return 1
+ ;;
+
+ 17)
+ # Deep-scrub only (all replicas are diffent than the object info
+ local payload=ROBJ17
+ echo $payload > $dir/new.ROBJ17
+ objectstore_tool $dir 0 $objname set-bytes $dir/new.ROBJ17 || return 1
+ objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ17 || return 1
+ ;;
+
+ 18)
+ # Deep-scrub only (all replicas are diffent than the object info
+ local payload=ROBJ18
+ echo $payload > $dir/new.ROBJ18
+ objectstore_tool $dir 0 $objname set-bytes $dir/new.ROBJ18 || return 1
+ objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ18 || return 1
+ # Make one replica have a different object info, so a full repair must happen too
+ objectstore_tool $dir $osd $objname corrupt-info || return 1
+ ;;
+
+ 19)
+ # Set osd-max-object-size smaller than this object's size
+
+ esac
+ done
+
+ local pg=$(get_pg $poolname ROBJ0)
+
+ ceph tell osd.\* injectargs -- --osd-max-object-size=1048576
+
+ inject_eio rep data $poolname ROBJ11 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
+ inject_eio rep mdata $poolname ROBJ12 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
+ inject_eio rep data $poolname ROBJ13 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
+
+ # first sequence: the final shallow scrub should not override any of the deep errors
+ pg_scrub $pg
+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c "$sortkeys" | jq '.' > /tmp/WQR_1.json
+ pg_scrub $pg
+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c "$sortkeys" | jq '.' > /tmp/WQR_1b.json
+ rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/sh1_results.json
+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
+ python3 -c "$sortkeys" > /tmp/WQR_1b_s.json
+
+ pg_deep_scrub $pg
+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c "$sortkeys" | jq '.' > /tmp/WQR_2.json
+ rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/dp_results.json
+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
+ python3 -c "$sortkeys" > /tmp/WQR_2s.json
+
+ pg_scrub $pg
+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c "$sortkeys" | jq '.' > /tmp/WQR_3.json
+ rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/sh2_results.json
+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
+ python3 -c "$sortkeys" > /tmp/WQR_3s.json
+
+ diff -u $dir/dp_results.json $dir/sh2_results.json || return 1
+
+ # inject a read error, which is a special case: the scrub encountering the read error
+ # would override the previously collected shard info.
+ inject_eio rep mdata $poolname ROBJ13 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
+
+ pg_deep_scrub $pg
+
+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c "$sortkeys" | jq '.' > /tmp/WQR_4.json
+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
+ python3 -c "$sortkeys" > /tmp/WQR_4s_w13.json
+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | \
+ jq 'del(.inconsistents[] | select(.object.name == "ROBJ13"))' | \
+ jq '.inconsistents' | python3 -c "$sortkeys" > /tmp/WQR_4s_wo13.json
+
+ rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
+ python3 -c "$sortkeys" > $dir/dpPart2_w13_results.json
+ # Remove the entry with "name":"ROBJ13" from the $dir/d*_results.json
+ rados list-inconsistent-obj $pg | jq "$jqfilter" | jq 'del(.inconsistents[] | select(.object.name == "ROBJ13"))' | \
+ jq '.inconsistents' | python3 -c "$sortkeys" > $dir/dpPart2_wo13_results.json
+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
+ python3 -c "$sortkeys" > /tmp/WQR_4s.json
+
+ pg_scrub $pg
+
+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c "$sortkeys" | jq '.' > /tmp/WQR_5.json
+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
+ python3 -c "$sortkeys" > /tmp/WQR_5s_w13.json
+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | \
+ jq 'del(.inconsistents[] | select(.object.name == "ROBJ13"))' |\
+ jq '.inconsistents' | python3 -c "$sortkeys" > /tmp/WQR_5s_wo13.json
+
+ rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | python3 -c "$sortkeys" > \
+ $dir/sh2Part2_w13_results.json
+ rados list-inconsistent-obj $pg | jq "$jqfilter" | jq 'del(.inconsistents[] | select(.object.name == "ROBJ13"))' |\
+ jq '.inconsistents' | python3 -c "$sortkeys" > $dir/shPart2_wo13_results.json
+
+ # the shallow scrub results should differ from the results of the deep
+ # scrub preceding it, but the difference should be limited to ROBJ13
+ diff -u $dir/dpPart2_w13_results.json $dir/sh2Part2_w13_results.json && return 1
+ diff -u $dir/dpPart2_wo13_results.json $dir/shPart2_wo13_results.json || return 1
+
+ ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it
+ return 0
+}
+
main osd-scrub-repair "$@"