summaryrefslogtreecommitdiffstats
path: root/qa/workunits/rgw/test_rgw_bucket_check.py
blob: 33936df2401f020a5e8dc75b02936c4ab49e5c89 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#!/usr/bin/env python3

import logging as log
import json
import botocore
from common import exec_cmd, create_user, boto_connect, put_objects, create_unlinked_objects
from botocore.config import Config

"""
Tests behavior of radosgw-admin bucket check commands. 
"""
# The test cases in this file have been annotated for inventory.
# To extract the inventory (in csv format) use the command:
#
#   grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //'
#
#

""" Constants """
USER = 'check-tester'
DISPLAY_NAME = 'Check Testing'
ACCESS_KEY = 'OJODXSLNX4LUNHQG99PA'
SECRET_KEY = '3l6ffld34qaymfomuh832j94738aie2x4p2o8h6n'
BUCKET_NAME = 'check-bucket'

def main():
    """
    execute bucket check commands
    """
    create_user(USER, DISPLAY_NAME, ACCESS_KEY, SECRET_KEY)

    connection = boto_connect(ACCESS_KEY, SECRET_KEY, Config(retries = {
        'total_max_attempts': 1,
    }))

    # pre-test cleanup
    try:
        bucket = connection.Bucket(BUCKET_NAME)
        bucket.objects.all().delete()
        bucket.object_versions.all().delete()
        bucket.delete()
    except botocore.exceptions.ClientError as e:
        if not e.response['Error']['Code'] == 'NoSuchBucket':
            raise

    bucket = connection.create_bucket(Bucket=BUCKET_NAME)

    null_version_keys = ['a', 'z']
    null_version_objs = put_objects(bucket, null_version_keys)

    connection.BucketVersioning(BUCKET_NAME).enable()

    ok_keys = ['a', 'b', 'c', 'd']
    unlinked_keys = ['c', 'd', 'e', 'f']
    ok_objs = put_objects(bucket, ok_keys)
    
    # TESTCASE 'recalculated bucket check stats are correct'
    log.debug('TEST: recalculated bucket check stats are correct\n')
    exec_cmd(f'radosgw-admin bucket check --fix --bucket {BUCKET_NAME}')
    out = exec_cmd(f'radosgw-admin bucket stats --bucket {BUCKET_NAME}')
    json_out = json.loads(out)
    log.debug(json_out['usage'])
    assert json_out['usage']['rgw.main']['num_objects'] == 6
    
    # TESTCASE 'bucket check unlinked does not report normal entries'
    log.debug('TEST: bucket check unlinked does not report normal entries\n')
    out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {BUCKET_NAME} --min-age-hours 0 --dump-keys')
    json_out = json.loads(out)
    assert len(json_out) == 0

    unlinked_objs = create_unlinked_objects(connection, bucket, unlinked_keys)
    
    # TESTCASE 'bucket check unlinked finds unlistable entries'
    log.debug('TEST: bucket check unlinked finds unlistable entries\n')
    out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {BUCKET_NAME} --min-age-hours 0 --dump-keys')
    json_out = json.loads(out)
    assert len(json_out) == len(unlinked_keys)

    # TESTCASE 'unlinked entries are not listable'
    log.debug('TEST: unlinked entries are not listable\n')
    for ov in bucket.object_versions.all():
        assert (ov.key, ov.version_id) not in unlinked_objs, f'object "{ov.key}:{ov.version_id}" was found in bucket listing'

    # TESTCASE 'GET returns 404 for unlinked entry keys that have no other versions'
    log.debug('TEST: GET returns 404 for unlinked entry keys that have no other versions\n')
    noent_keys = set(unlinked_keys) - set(ok_keys)
    for key in noent_keys:
        try:
            bucket.Object(key).get()
            assert False, 'GET did not return 404 for key={key} with no prior successful PUT'
        except botocore.exceptions.ClientError as e:
            assert e.response['ResponseMetadata']['HTTPStatusCode'] == 404
            
    # TESTCASE 'bucket check unlinked fixes unlistable entries'
    log.debug('TEST: bucket check unlinked fixes unlistable entries\n')
    out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {BUCKET_NAME} --fix --min-age-hours 0 --rgw-olh-pending-timeout-sec 0 --dump-keys')
    json_out = json.loads(out)
    assert len(json_out) == len(unlinked_keys)
    for o in unlinked_objs:
        try:
            connection.ObjectVersion(bucket.name, o[0], o[1]).head()
            assert False, f'head for unlistable object {o[0]}:{o[1]} succeeded after fix'
        except botocore.exceptions.ClientError as e:
            assert e.response['ResponseMetadata']['HTTPStatusCode'] == 404

    # TESTCASE 'bucket check unlinked fix does not affect normal entries'
    log.debug('TEST: bucket check unlinked does not affect normal entries\n')
    all_listable = list(bucket.object_versions.all())
    assert len(all_listable) == len(ok_keys) + len(null_version_keys), 'some normal objects were not accounted for in object listing after unlinked fix'
    for o in ok_objs:
        assert o in map(lambda x: (x.key, x.version_id), all_listable), "normal object not listable after fix"
        connection.ObjectVersion(bucket.name, o[0], o[1]).head()

    # TESTCASE 'bucket check unlinked does not find new unlistable entries after fix'
    log.debug('TEST: bucket check unlinked does not find new unlistable entries after fix\n')
    out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {BUCKET_NAME} --min-age-hours 0 --dump-keys')
    json_out = json.loads(out)
    assert len(json_out) == 0
    
    # for this set of keys we can produce leftover OLH object/entries by
    # deleting the normal object instance since we should already have a leftover
    # pending xattr on the OLH object due to the errors associated with the 
    # prior unlinked entries that were created for the same keys 
    leftover_pending_xattr_keys = set(ok_keys).intersection(unlinked_keys)
    objs_to_delete = filter(lambda x: x[0] in leftover_pending_xattr_keys, ok_objs)
        
    for o in objs_to_delete:
        connection.ObjectVersion(bucket.name, o[0], o[1]).delete()

    for key in leftover_pending_xattr_keys:
        out = exec_cmd(f'radosgw-admin bi list --bucket {BUCKET_NAME} --object {key}')
        idx_entries = json.loads(out.replace(b'\x80', b'0x80'))
        assert len(idx_entries) > 0, 'failed to create leftover OLH entries for key {key}'
        
    # TESTCASE 'bucket check olh finds leftover OLH entries'
    log.debug('TEST: bucket check olh finds leftover OLH entries\n')
    out = exec_cmd(f'radosgw-admin bucket check olh --bucket {BUCKET_NAME} --dump-keys')
    json_out = json.loads(out)
    assert len(json_out) == len(leftover_pending_xattr_keys)

    # TESTCASE 'bucket check olh fixes leftover OLH entries'
    log.debug('TEST: bucket check olh fixes leftover OLH entries\n')
    out = exec_cmd(f'radosgw-admin bucket check olh --bucket {BUCKET_NAME} --fix --rgw-olh-pending-timeout-sec 0 --dump-keys')
    json_out = json.loads(out)
    assert len(json_out) == len(leftover_pending_xattr_keys)
    
    for key in leftover_pending_xattr_keys:
        out = exec_cmd(f'radosgw-admin bi list --bucket {BUCKET_NAME} --object {key}')
        idx_entries = json.loads(out.replace(b'\x80', b'0x80'))
        assert len(idx_entries) == 0, 'index entries still exist for key={key} after olh fix'

    # TESTCASE 'bucket check olh does not find new leftover OLH entries after fix'
    log.debug('TEST: bucket check olh does not find new leftover OLH entries after fix\n')
    out = exec_cmd(f'radosgw-admin bucket check olh --bucket {BUCKET_NAME} --dump-keys')
    json_out = json.loads(out)
    assert len(json_out) == 0

    # TESTCASE 'bucket check fixes do not affect null version objects'
    log.debug('TEST: verify that bucket check fixes do not affect null version objects\n')
    for o in null_version_objs:
        connection.ObjectVersion(bucket.name, o[0], 'null').head()
        
    all_versions = list(map(lambda x: (x.key, x.version_id), bucket.object_versions.all()))
    for key in null_version_keys:
        assert (key, 'null') in all_versions

    # TESTCASE 'bucket check stats are correct in the presence of unlinked entries'
    log.debug('TEST: bucket check stats are correct in the presence of unlinked entries\n')
    bucket.object_versions.all().delete()
    null_version_objs = put_objects(bucket, null_version_keys)
    ok_objs = put_objects(bucket, ok_keys)
    unlinked_objs = create_unlinked_objects(connection, bucket, unlinked_keys)
    exec_cmd(f'radosgw-admin bucket check --fix --bucket {BUCKET_NAME}')
    out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {BUCKET_NAME} --fix --min-age-hours 0 --rgw-olh-pending-timeout-sec 0 --dump-keys')
    json_out = json.loads(out)
    log.info(f'"bucket check unlinked" returned {json_out}, expecting {unlinked_keys}')
    assert len(json_out) == len(unlinked_keys)
    bucket.object_versions.all().delete()
    out = exec_cmd(f'radosgw-admin bucket stats --bucket {BUCKET_NAME}')
    json_out = json.loads(out)
    log.debug(json_out['usage'])
    assert json_out['usage']['rgw.main']['size'] == 0
    assert json_out['usage']['rgw.main']['num_objects'] == 0
    assert json_out['usage']['rgw.main']['size_actual'] == 0
    assert json_out['usage']['rgw.main']['size_kb'] == 0
    assert json_out['usage']['rgw.main']['size_kb_actual'] == 0
    assert json_out['usage']['rgw.main']['size_kb_utilized'] == 0

    # Clean up
    log.debug("Deleting bucket {}".format(BUCKET_NAME))
    bucket.object_versions.all().delete()
    bucket.delete()

main()
log.info("Completed bucket check tests")