src/rgw/rgw-restore-bucket-index


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404

#!/usr/bin/env bash

# version 2024-03-11

# rgw-restore-bucket-index is an EXPERIMENTAL tool to use in case
# bucket index entries for objects in the bucket are somehow lost. It
# is expected to be needed and used rarely. A bucket name is provided
# and the data pool for that bucket is scanned for all head objects
# matching the bucket's marker. The rgw object name is then extracted
# from the rados object name, and `radosgw-admin bucket reindex ...`
# is used to add the bucket index entry.
#
# Because this script must process json objects, the `jq` tool must be
# installed on the system.
#
# Usage: see the usage() function below for details
#
# This tool is designed to be interactive, allowing the user to
# examine the list of objects to be reindexed before
# proceeding. However, if the "--proceed" option is provided, the
# script will not prompt the user and simply proceed.

trap "clean ; exit 1" TERM
export TOP_PID=$$

# IMPORTANT: affects order produced by 'sort' and 'ceph-diff-sorted'
# relies on this ordering
export LC_ALL=C

# whether or not the temporary files are cleaned on completion
export clean_temps=1

# make explicit tabs easier to see in code
export TAB="	"


#
# helper functions
#

super_exit() {
    kill -s TERM -${TOP_PID}
}

usage() {
  >&2 cat << EOF

Usage: $0 -b <bucket-name> [-l <rados-ls-file>] [-p <pool>] [-y]

where:
  -b <bucket-name>     Required - name of the bucket to operate on
  -l <rados-ls-file>   Optional - file containing the output of 'rados ls -p <pool>'
  -r <realm-name>      Optional - specify the realm if not applying to the default realm"
  -g <zonegroup-name>  Optional - specify the zonegroup if not applying to the default zonegroup"
  -z <zone-name>       Optional - specify the zone if not applying to the default zone"
  -p <pool>            Optional - data pool; if not provided will be inferred from bucket and zone information
  -t <tmp-dir>         Optional - specify a directory for temporary files other than the default of /tmp
  -y                   Optional - proceed with restoring without confirming with the user
                       USE WITH CAUTION.
  -d                   Optional - run with debugging output
EOF
  super_exit
}

# cleans all temporary files
clean() {
  if [ "$clean_temps" == 1 ] ;then
    rm -f $bkt_entry $temp_file_list \
       $zone_info $olh_info_enc $olh_info_json
  fi
}

test_temp_space() {
    # use df to determine percentage of data and inodes used; strip
    # out spaces and percent signs from the output, so we just have a
    # number from 0 to 100
    pcent=$(df -k $temp_dir --output=pcent | tail -1 | sed 's/[ %]//g')
    ipcent=$(df -k $temp_dir --output=ipcent | tail -1 | sed 's/[ %]//g')
    if [ "$pcent" -eq 100 -o "$ipcent" -eq 100 ] ;then
	>&2 echo "ERROR: the temporary directory's partition is full, preventing continuation."
	>&2 echo "    NOTE: the temporary directory is \"${temp_dir}\"."
	>&2 df -k $temp_dir -h --output="target,used,avail,pcent,iused,iavail,ipcent"
	>&2 echo "    NOTE: cleaning temporary files before exiting...."
	super_exit
    fi
}

# number of seconds for a bucket index pending op to be completed via
# dir_suggest mechanism
export pending_op_secs=120

#
# sanity checks
#

export exit_code=0

tool_list="radosgw-admin ceph-dencoder jq"
for t in $tool_list ;do
    if which $t > /dev/null ;then
	:
    else
	echo "ERROR: must have tool \`$t\` installed and on \$PATH for operation."
	exit_code=1
    fi
done
if [ "$exit_code" -ne 0 ] ;then
    exit $exit_code
fi

dencode_list="RGWOLHInfo"
for t in $dencode_list ;do
    if ceph-dencoder list_types | grep -q $t ;then
	:
    else
	echo "ERROR: ceph-dencoder lacking module to decode ${t}."
	exit_code=1
    fi
done
if [ "$exit_code" -ne 0 ] ;then
    exit $exit_code
fi

# Determines the name of the data pool. Expects the optional
# command-line argument to appear as $1 if there is one. The
# command-line has the highest priority, then the "explicit_placement"
# in the bucket instance data, and finally the "placement_rule" in the
# bucket instance data.
get_pool() {
  # explicit_placement
  expl_pool=$(jq -r '.data.bucket_info.bucket.explicit_placement.data_pool' $bkt_inst)
  if [ -n "$expl_pool" ] ;then
    echo "$expl_pool"
    exit 0
  fi

  # placement_rule
  plmt_rule=$(jq -r '.data.bucket_info.placement_rule' $bkt_inst)
  plmt_pool=$(echo "$plmt_rule" | awk -F / '{print $1}')
  plmt_class=$(echo "$plmt_rule" | awk -F / '{print $2}')
  if [ -z "$plmt_class" ] ;then
    plmt_class=STANDARD
  fi

  radosgw-admin zone get $multisite_spec >$zone_info 2>/dev/null
  test_temp_space
  pool=$(jq -r ".placement_pools [] | select(.key | contains(\"${plmt_pool}\")) .val .storage_classes.${plmt_class}.data_pool" $zone_info)

  if [ -z "$pool" ] ;then
      echo ERROR: unable to determine pool.
      super_exit
  fi
  echo "$pool"
}

export bucket=""
export temp_dir=/tmp
pool=""
multisite_spec=""
lsoutput=""
debug=0

while getopts "b:l:p:r:g:z:ydt:" o; do
    case "${o}" in
	b)
	    bucket="${OPTARG}"
	    ;;
	l)
	    if [ -e "${OPTARG}" ]; then
		lsoutput="${OPTARG}"
	    else
		echo 
		echo "ERROR: Provided 'rados ls' output file name does not exist. ${OPTARG}"
		exit 1
	    fi
	    ;;
	p)
	    pool="${OPTARG}"
	    ;;
	r)
	    multisite_spec="$multisite_spec --rgw-realm=${OPTARG}"
	    ;;
	g)
	    multisite_spec="$multisite_spec --rgw-zonegroup=${OPTARG}"
	    ;;
	z)
	    multisite_spec="$multisite_spec --rgw-zone=${OPTARG}"
	    ;;
	y)
	    echo "NOTICE: This tool is currently considered EXPERIMENTAL."
	    proceed=1
	    ;;
	d)
	    echo Debugging On
	    debug=1
	    clean_temps=0
	    ;;
	t)
	    temp_dir="${OPTARG}"
	    ;;
	*)
	    echo
	    usage
	    exit 1 # useage should exit also
	    ;;
    esac
done
shift $((OPTIND-1))

if [ "$debug" == 1 ] ;then
    export debugging_rgwadmin=" --debug-rgw=20 --debug-ms=20 "
else
    export debugging_rgwadmin=" 2>/dev/null "
fi

if [ ! -d "$temp_dir" ] ;then
    echo "ERROR: temporary directory $temp_dir is not a directory"
    exit 1
fi

# temporary files
export bkt_entry=${temp_dir}/rgwrbi-bkt-entry.$$
export bkt_inst=${temp_dir}/rgwrbi-bkt-inst.$$
export marker_ls=${temp_dir}/rgwrbi-marker-ls.$$
export obj_list=${temp_dir}/rgwrbi-object-list.$$
export obj_list_ver=${temp_dir}/rgwrbi-object-list-ver.$$
export zone_info=${temp_dir}/rgwrbi-zone-info.$$
export olh_info_enc=${temp_dir}/rgwrbi-olh-info-enc.$$
export olh_info_json=${temp_dir}/rgwrbi-olh-info-json.$$
export debug_log=${temp_dir}/rgwrbi-debug-log.$$

export temp_file_list="$bkt_entry $bkt_inst $marker_ls $obj_list $obj_list_ver $zone_info $olh_info_enc $olh_info_json"

# special code path for versioned buckets
handle_versioned() {
    while read o ;do

	# determine object and locator for OLH
	olh_line=$(awk "/_$o(\t.*)?$/"' && !/__:/' $marker_ls)
	olh_obj=$(echo "$olh_line" | sed 's/\t.*//') # obj everything before tab
	olh_loc=$(echo "$olh_line" | sed 's/^.*\t\(.*\)/\1/') # locator everything after tab

	# process OLH object; determine final instance or delete-marker
	rados -p $pool getxattr $olh_obj user.rgw.olh.info --object-locator "$olh_loc" >$olh_info_enc
	test_temp_space
	ceph-dencoder import $olh_info_enc type RGWOLHInfo decode dump_json >$olh_info_json
	test_temp_space
	last_instance=$(jq -r ".target.key.instance" $olh_info_json)
	if [ -z "$last_instance" ] ;then
	    # filters out entry without an instance
	    filter_out_last_instance="${marker}_[^_]"
	else
	    # filters out entry with an instance
	    filter_out_last_instance="$last_instance"
	fi

	if [ "$debug" == 1 ] ;then
	    echo "working on versioned $o"
	    echo "last instance is $last_instance"
	    echo "filter_out_last_instance is $filter_out_last_instance"
	fi >>$debug_log
	test_temp_space

	# we currently don't need the delete marker, but we can have access to it
	# delete_marker=$(jq -r ".removed" $olh_info_json) # true or false

	IFS='\t' grep -E "(__:.*[^_])?_$o(\t.*)?$" $marker_ls | # versioned head objects
	    while read obj loc ;do
		if [ "$debug" == 1 ] ;then
		    echo "obj=$obj ; loc=$loc" >>$debug_log
		fi
		test_temp_space
		rados -p $pool stat2 $obj --object-locator "$loc"
	    done | # output of stat2, which includes mtime
	    sort -T $temp_dir -k 3 | # stat2 but sorted by mtime earlier to later
	    grep -v -e "$filter_out_last_instance" | # remove the final instance in case it's not last

	    # sed 1) removes pool and marker, 2) removes indicator of
	    # version id, 3) removes obj name including escaped
	    # leading underscores, 4) inserts object name and tab at
	    # front of line, and 5) removes trailing tab. Note: after
	    # 3) the line will be empty or contain a version id, so 5)
	    # is for when that line is empty and 4 inserts a tab
	    sed -E \
		-e "s/.*${marker}//" \
		-e 's/^__://' \
		-e "s/_+${o}.*//" \
		-e "s/^/${o}\t/"
	echo "${o}${TAB}$last_instance" # now add the final instance; could be delete marker
    done <$obj_list 2>/dev/null | sed 's/\t$//' >$obj_list_ver
    test_temp_space

} # handle_versioned

if [ -z "$bucket" ]; then
  echo
  echo "ERROR: Bucket option ( -b ) is required."
  usage
fi

# read bucket entry metadata
eval "radosgw-admin metadata get bucket:$bucket $debugging_rgwadmin $multisite_spec >$bkt_entry"
test_temp_space
export marker=$(jq -r ".data.bucket.marker" $bkt_entry)
export bucket_id=$(jq -r ".data.bucket.bucket_id" $bkt_entry)
if [ -z "$marker" -o -z "$bucket_id" ] ;then
    echo "ERROR: Unable to read entry-point metadata for bucket \"$bucket\"."
    echo "       Please make sure that <bucket-name> is correct and, if not using"
    echo "       the defaults, that <realm-name>, <zonegroup-name>, and/or"
    echo "       <zone-name> are correctly specified."
    clean
    usage
    exit 1
fi

echo marker is $marker
echo bucket_id is $bucket_id

# read bucket instance metadata
eval "radosgw-admin metadata get bucket.instance:${bucket}:$bucket_id $multisite_spec $debugging_rgwadmin >$bkt_inst"
test_temp_space

# examine number of bucket index shards
num_shards=$(jq ".data.bucket_info.num_shards" $bkt_inst)
echo number of bucket index shards is $num_shards

# determine data pool
if [ -z "$pool" ]; then
  pool=$(get_pool)
fi
echo data pool is $pool

# handle versioned buckets
export bkt_flags=$(jq ".data.bucket_info.flags" $bkt_inst)
if [ -z "$bkt_flags" ] ;then
    echo "ERROR: unable to read instance metadata for bucket \"$bucket\"."
    clean
    exit 1
fi

# search the data pool for all of the head objects that begin with the
# marker that are not in namespaces (indicated by an extra underscore
# and colon) and then strip away all but the rgw object name,
# including optional locator that follows a tab. Initial underscores
# are quoted with an underscore, so swap the first double with a
# single.
if [ -z "$lsoutput" ]; then
  ( rados -p $pool ls | grep "^${marker}_" >$marker_ls ) 2>/dev/null
  test_temp_space
else
  ( grep "^${marker}_" "${lsoutput}" >$marker_ls ) 2>/dev/null
  test_temp_space
fi

( sed -E 's/\t.*//' $marker_ls | grep -v -E "^${marker}__[^_]+_" | sed -E "s/^${marker}_(.*)/\1/" | sed 's/^__/_/' >$obj_list ) 2>/dev/null
test_temp_space

# mask bit indicating it's a versioned bucket
export is_versioned=$(( $bkt_flags & 2))
export is_suspended=$(( $bkt_flags & 4))
if [ "$is_versioned" -ne 0 ] ;then
    echo "INFO: this bucket appears to be versioned."
    handle_versioned
else
    # no additional versioned handling, so just hard link
    ln $obj_list $obj_list_ver
fi

# handle the case where the resulting object list file is empty
if [ -s $obj_list_ver ] ;then
    :
else
    echo "NOTICE: No head objects for bucket \"$bucket\" were found in pool \"$pool\", so nothing was recovered."
    clean
    exit 0
fi

if [ -z "$proceed" ] ;then
    # warn user and get permission to proceed
    echo "NOTICE: This tool is currently considered EXPERIMENTAL."
    echo "The list of objects that we will attempt to restore can be found in \"$obj_list_ver\"."
    echo "Please review the object names in that file (either below or in another window/terminal) before proceeding."
    while true ; do
	read -p "Type \"proceed!\" to proceed, \"view\" to view object list, or \"q\" to quit: " action
	if [ "$action" == "q" ] ;then
	    echo "Exiting..."
	    clean
	    exit 0
	elif [ "$action" == "view" ] ;then
	    echo "Viewing..."
	    less $obj_list_ver
	elif [ "$action" == "proceed!" ] ;then
	    echo "Proceeding..."
	    break
	else
	    echo "Error: response \"$action\" is not understood."
	fi
    done
fi

eval "radosgw-admin object reindex --bucket=$bucket --objects-file=$obj_list_ver $multisite_spec --yes-i-really-mean-it $debugging_rgwadmin"

clean
echo Done