summaryrefslogtreecommitdiffstats
path: root/qa/standalone/osd/osd-recovery-space.sh
blob: 67fe2e91dfa2a07e87a7b985fb4f2e577a856d01 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#!/usr/bin/env bash
#
# Copyright (C) 2018 Red Hat <contact@redhat.com>
#
# Author: David Zafman <dzafman@redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library Public License for more details.
#

source $CEPH_ROOT/qa/standalone/ceph-helpers.sh

function run() {
    local dir=$1
    shift

    export CEPH_MON="127.0.0.1:7221" # git grep '\<7221\>' : there must be only one
    export CEPH_ARGS
    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
    CEPH_ARGS+="--mon-host=$CEPH_MON "
    CEPH_ARGS+="--osd_max_backfills=10 "
    CEPH_ARGS+="--osd_mclock_override_recovery_settings=true "
    export objects=600
    export poolprefix=test

    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
    for func in $funcs ; do
        setup $dir || return 1
        $func $dir || return 1
        teardown $dir || return 1
    done
}


function get_num_in_state() {
    local state=$1
    local expression
    expression+="select(contains(\"${state}\"))"
    ceph --format json pg dump pgs 2>/dev/null | \
        jq ".pg_stats | [.[] | .state | $expression] | length"
}


function wait_for_state() {
    local state=$1
    local cur_in_state
    local -a delays=($(get_timeout_delays $2 5))
    local -i loop=0

    flush_pg_stats || return 1
    while test $(get_num_pgs) == 0 ; do
	sleep 1
    done

    while true ; do
        cur_in_state=$(get_num_in_state ${state})
        test $cur_in_state -gt 0 && break
        if (( $loop >= ${#delays[*]} )) ; then
            ceph pg dump pgs
            return 1
        fi
        sleep ${delays[$loop]}
        loop+=1
    done
    return 0
}


function wait_for_recovery_toofull() {
    local timeout=$1
    wait_for_state recovery_toofull $timeout
    if [ $ret -ne 0 ]; then
      echo "Error: Recovery toofull timeout"
      return 1
    fi
}


# Create 1 pools with size 1
# set ful-ratio to 50%
# Write data 600 5K (3000K)
# Inject fake_statfs_for_testing to 3600K (83% full)
# Incresase the pool size to 2
# The pool shouldn't have room to recovery
function TEST_recovery_test_simple() {
    local dir=$1
    local pools=1
    local OSDS=2

    run_mon $dir a || return 1
    run_mgr $dir x || return 1
    export CEPH_ARGS

    for osd in $(seq 0 $(expr $OSDS - 1))
    do
      run_osd $dir $osd || return 1
    done

    ceph osd set-nearfull-ratio .40
    ceph osd set-backfillfull-ratio .45
    ceph osd set-full-ratio .50

    for p in $(seq 1 $pools)
    do
      create_pool "${poolprefix}$p" 1 1
      ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it
    done

    wait_for_clean || return 1

    dd if=/dev/urandom of=$dir/datafile bs=1024 count=5
    for o in $(seq 1 $objects)
    do
      rados -p "${poolprefix}$p" put obj$o $dir/datafile
    done

    for o in $(seq 0 $(expr $OSDS - 1))
    do
      ceph tell osd.$o injectargs '--fake_statfs_for_testing 3686400' || return 1
    done
    sleep 5

    ceph pg dump pgs

    for p in $(seq 1 $pools)
    do
      ceph osd pool set "${poolprefix}$p" size 2
    done

    # If this times out, we'll detected errors below
    wait_for_recovery_toofull 120
    if [ $? -ne 0 ]; then
      echo "Error: Recovery toofull timeout"
      return 1
    fi

    ERRORS=0
    if [ "$(ceph pg dump pgs | grep +recovery_toofull | wc -l)" != "1" ];
    then
      echo "One pool should have been in recovery_toofull"
      ERRORS="$(expr $ERRORS + 1)"
    fi

    ceph pg dump pgs
    ceph status
    ceph status --format=json-pretty > $dir/stat.json

    eval SEV=$(jq '.health.checks.PG_RECOVERY_FULL.severity' $dir/stat.json)
    if [ "$SEV" != "HEALTH_ERR" ]; then
      echo "PG_RECOVERY_FULL severity $SEV not HEALTH_ERR"
      ERRORS="$(expr $ERRORS + 1)"
    fi
    eval MSG=$(jq '.health.checks.PG_RECOVERY_FULL.summary.message' $dir/stat.json)
    if [ "$MSG" != "Full OSDs blocking recovery: 1 pg recovery_toofull" ]; then
      echo "PG_RECOVERY_FULL message '$MSG' mismatched"
      ERRORS="$(expr $ERRORS + 1)"
    fi
    rm -f $dir/stat.json

    if [ $ERRORS != "0" ];
    then
      return 1
    fi

    for i in $(seq 1 $pools)
    do
      delete_pool "${poolprefix}$i"
    done
    kill_daemons $dir || return 1
}


main osd-recovery-space "$@"

# Local Variables:
# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-space.sh"
# End: