summaryrefslogtreecommitdiffstats
path: root/qa/standalone
diff options
context:
space:
mode:
authorDanWritesCode <github@dann.me>2023-12-18 22:09:07 +0100
committerDanWritesCode <github@dann.me>2024-03-04 22:08:48 +0100
commit78d6bfe54c3b9b60fab36a640b1ce77c8f022fa9 (patch)
tree895c45ce18afa98fbd2baf923a5deea50d802006 /qa/standalone
parentMerge pull request #52495 from adamemerson/wip-neorados-learning-from-experience (diff)
downloadceph-78d6bfe54c3b9b60fab36a640b1ce77c8f022fa9.tar.xz
ceph-78d6bfe54c3b9b60fab36a640b1ce77c8f022fa9.zip
osd: add clear_shards_repaired command
This command will allow us to clear the OSD_TOO_MANY_REPAIRS alert by setting the shard repair count to 0. This will help in cases where the alert was a false positive, or a condition that has since cleared at the disk level. Often, zeroing out the repair count is better than muting the alert or restarting the OSD. Fixes: https://tracker.ceph.com/issues/54182 Co-authored-by: David Zafman <dzafman@redhat.com> Signed-off-by: Daniel Radjenovic <dradjenovic@digitalocean.com>
Diffstat (limited to 'qa/standalone')
-rwxr-xr-xqa/standalone/osd/osd-rep-recov-eio.sh14
1 files changed, 13 insertions, 1 deletions
diff --git a/qa/standalone/osd/osd-rep-recov-eio.sh b/qa/standalone/osd/osd-rep-recov-eio.sh
index 6fea441b3a9..a34f4a47189 100755
--- a/qa/standalone/osd/osd-rep-recov-eio.sh
+++ b/qa/standalone/osd/osd-rep-recov-eio.sh
@@ -219,6 +219,18 @@ function TEST_rados_repair_warning() {
ceph health | $(! grep -q "Too many repaired reads on 1 OSDs") || return 1
set +o pipefail
+ ceph health unmute OSD_TOO_MANY_REPAIRS
+ ceph tell osd.$primary clear_shards_repaired
+ sleep 10
+
+ set -o pipefail
+ # Should clear this
+ ceph health | $(! grep -q "Too many repaired reads on 1 OSDs") || return 1
+ set +o pipefail
+
+ ceph tell osd.$primary clear_shards_repaired $OBJS
+ sleep 10
+
for i in $(seq 1 $OBJS)
do
inject_$inject rep data $poolname ${objbase}-$i $dir 0 || return 1
@@ -235,7 +247,7 @@ function TEST_rados_repair_warning() {
COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
test "$COUNT" = "$(expr $OBJS \* 3)" || return 1
- # Give mon a chance to notice additional OSD and unmute
+ # Give mon a chance to notice additional OSD and reset num_shards_repaired
# The default tick time is 5 seconds
CHECKTIME=10
LOOPS=0