#!/usr/bin/env bash # version 2024-03-11 # rgw-restore-bucket-index is an EXPERIMENTAL tool to use in case # bucket index entries for objects in the bucket are somehow lost. It # is expected to be needed and used rarely. A bucket name is provided # and the data pool for that bucket is scanned for all head objects # matching the bucket's marker. The rgw object name is then extracted # from the rados object name, and `radosgw-admin bucket reindex ...` # is used to add the bucket index entry. # # Because this script must process json objects, the `jq` tool must be # installed on the system. # # Usage: see the usage() function below for details # # This tool is designed to be interactive, allowing the user to # examine the list of objects to be reindexed before # proceeding. However, if the "--proceed" option is provided, the # script will not prompt the user and simply proceed. trap "clean ; exit 1" TERM export TOP_PID=$$ # IMPORTANT: affects order produced by 'sort' and 'ceph-diff-sorted' # relies on this ordering export LC_ALL=C # whether or not the temporary files are cleaned on completion export clean_temps=1 # make explicit tabs easier to see in code export TAB=" " # # helper functions # super_exit() { kill -s TERM -${TOP_PID} } usage() { >&2 cat << EOF Usage: $0 -b [-l ] [-p ] [-y] where: -b Required - name of the bucket to operate on -l Optional - file containing the output of 'rados ls -p ' -r Optional - specify the realm if not applying to the default realm" -g Optional - specify the zonegroup if not applying to the default zonegroup" -z Optional - specify the zone if not applying to the default zone" -p Optional - data pool; if not provided will be inferred from bucket and zone information -t Optional - specify a directory for temporary files other than the default of /tmp -y Optional - proceed with restoring without confirming with the user USE WITH CAUTION. -d Optional - run with debugging output EOF super_exit } # cleans all temporary files clean() { if [ "$clean_temps" == 1 ] ;then rm -f $bkt_entry $temp_file_list \ $zone_info $olh_info_enc $olh_info_json fi } test_temp_space() { # use df to determine percentage of data and inodes used; strip # out spaces and percent signs from the output, so we just have a # number from 0 to 100 pcent=$(df -k $temp_dir --output=pcent | tail -1 | sed 's/[ %]//g') ipcent=$(df -k $temp_dir --output=ipcent | tail -1 | sed 's/[ %]//g') if [ "$pcent" -eq 100 -o "$ipcent" -eq 100 ] ;then >&2 echo "ERROR: the temporary directory's partition is full, preventing continuation." >&2 echo " NOTE: the temporary directory is \"${temp_dir}\"." >&2 df -k $temp_dir -h --output="target,used,avail,pcent,iused,iavail,ipcent" >&2 echo " NOTE: cleaning temporary files before exiting...." super_exit fi } # number of seconds for a bucket index pending op to be completed via # dir_suggest mechanism export pending_op_secs=120 # # sanity checks # export exit_code=0 tool_list="radosgw-admin ceph-dencoder jq" for t in $tool_list ;do if which $t > /dev/null ;then : else echo "ERROR: must have tool \`$t\` installed and on \$PATH for operation." exit_code=1 fi done if [ "$exit_code" -ne 0 ] ;then exit $exit_code fi dencode_list="RGWOLHInfo" for t in $dencode_list ;do if ceph-dencoder list_types | grep -q $t ;then : else echo "ERROR: ceph-dencoder lacking module to decode ${t}." exit_code=1 fi done if [ "$exit_code" -ne 0 ] ;then exit $exit_code fi # Determines the name of the data pool. Expects the optional # command-line argument to appear as $1 if there is one. The # command-line has the highest priority, then the "explicit_placement" # in the bucket instance data, and finally the "placement_rule" in the # bucket instance data. get_pool() { # explicit_placement expl_pool=$(jq -r '.data.bucket_info.bucket.explicit_placement.data_pool' $bkt_inst) if [ -n "$expl_pool" ] ;then echo "$expl_pool" exit 0 fi # placement_rule plmt_rule=$(jq -r '.data.bucket_info.placement_rule' $bkt_inst) plmt_pool=$(echo "$plmt_rule" | awk -F / '{print $1}') plmt_class=$(echo "$plmt_rule" | awk -F / '{print $2}') if [ -z "$plmt_class" ] ;then plmt_class=STANDARD fi radosgw-admin zone get $multisite_spec >$zone_info 2>/dev/null test_temp_space pool=$(jq -r ".placement_pools [] | select(.key | contains(\"${plmt_pool}\")) .val .storage_classes.${plmt_class}.data_pool" $zone_info) if [ -z "$pool" ] ;then echo ERROR: unable to determine pool. super_exit fi echo "$pool" } export bucket="" export temp_dir=/tmp pool="" multisite_spec="" lsoutput="" debug=0 while getopts "b:l:p:r:g:z:ydt:" o; do case "${o}" in b) bucket="${OPTARG}" ;; l) if [ -e "${OPTARG}" ]; then lsoutput="${OPTARG}" else echo echo "ERROR: Provided 'rados ls' output file name does not exist. ${OPTARG}" exit 1 fi ;; p) pool="${OPTARG}" ;; r) multisite_spec="$multisite_spec --rgw-realm=${OPTARG}" ;; g) multisite_spec="$multisite_spec --rgw-zonegroup=${OPTARG}" ;; z) multisite_spec="$multisite_spec --rgw-zone=${OPTARG}" ;; y) echo "NOTICE: This tool is currently considered EXPERIMENTAL." proceed=1 ;; d) echo Debugging On debug=1 clean_temps=0 ;; t) temp_dir="${OPTARG}" ;; *) echo usage exit 1 # useage should exit also ;; esac done shift $((OPTIND-1)) if [ "$debug" == 1 ] ;then export debugging_rgwadmin=" --debug-rgw=20 --debug-ms=20 " else export debugging_rgwadmin=" 2>/dev/null " fi if [ ! -d "$temp_dir" ] ;then echo "ERROR: temporary directory $temp_dir is not a directory" exit 1 fi # temporary files export bkt_entry=${temp_dir}/rgwrbi-bkt-entry.$$ export bkt_inst=${temp_dir}/rgwrbi-bkt-inst.$$ export marker_ls=${temp_dir}/rgwrbi-marker-ls.$$ export obj_list=${temp_dir}/rgwrbi-object-list.$$ export obj_list_ver=${temp_dir}/rgwrbi-object-list-ver.$$ export zone_info=${temp_dir}/rgwrbi-zone-info.$$ export olh_info_enc=${temp_dir}/rgwrbi-olh-info-enc.$$ export olh_info_json=${temp_dir}/rgwrbi-olh-info-json.$$ export debug_log=${temp_dir}/rgwrbi-debug-log.$$ export temp_file_list="$bkt_entry $bkt_inst $marker_ls $obj_list $obj_list_ver $zone_info $olh_info_enc $olh_info_json" # special code path for versioned buckets handle_versioned() { while read o ;do # determine object and locator for OLH olh_line=$(awk "/_$o(\t.*)?$/"' && !/__:/' $marker_ls) olh_obj=$(echo "$olh_line" | sed 's/\t.*//') # obj everything before tab olh_loc=$(echo "$olh_line" | sed 's/^.*\t\(.*\)/\1/') # locator everything after tab # process OLH object; determine final instance or delete-marker rados -p $pool getxattr $olh_obj user.rgw.olh.info --object-locator "$olh_loc" >$olh_info_enc test_temp_space ceph-dencoder import $olh_info_enc type RGWOLHInfo decode dump_json >$olh_info_json test_temp_space last_instance=$(jq -r ".target.key.instance" $olh_info_json) if [ -z "$last_instance" ] ;then # filters out entry without an instance filter_out_last_instance="${marker}_[^_]" else # filters out entry with an instance filter_out_last_instance="$last_instance" fi if [ "$debug" == 1 ] ;then echo "working on versioned $o" echo "last instance is $last_instance" echo "filter_out_last_instance is $filter_out_last_instance" fi >>$debug_log test_temp_space # we currently don't need the delete marker, but we can have access to it # delete_marker=$(jq -r ".removed" $olh_info_json) # true or false IFS='\t' grep -E "(__:.*[^_])?_$o(\t.*)?$" $marker_ls | # versioned head objects while read obj loc ;do if [ "$debug" == 1 ] ;then echo "obj=$obj ; loc=$loc" >>$debug_log fi test_temp_space rados -p $pool stat2 $obj --object-locator "$loc" done | # output of stat2, which includes mtime sort -T $temp_dir -k 3 | # stat2 but sorted by mtime earlier to later grep -v -e "$filter_out_last_instance" | # remove the final instance in case it's not last # sed 1) removes pool and marker, 2) removes indicator of # version id, 3) removes obj name including escaped # leading underscores, 4) inserts object name and tab at # front of line, and 5) removes trailing tab. Note: after # 3) the line will be empty or contain a version id, so 5) # is for when that line is empty and 4 inserts a tab sed -E \ -e "s/.*${marker}//" \ -e 's/^__://' \ -e "s/_+${o}.*//" \ -e "s/^/${o}\t/" echo "${o}${TAB}$last_instance" # now add the final instance; could be delete marker done <$obj_list 2>/dev/null | sed 's/\t$//' >$obj_list_ver test_temp_space } # handle_versioned if [ -z "$bucket" ]; then echo echo "ERROR: Bucket option ( -b ) is required." usage fi # read bucket entry metadata eval "radosgw-admin metadata get bucket:$bucket $debugging_rgwadmin $multisite_spec >$bkt_entry" test_temp_space export marker=$(jq -r ".data.bucket.marker" $bkt_entry) export bucket_id=$(jq -r ".data.bucket.bucket_id" $bkt_entry) if [ -z "$marker" -o -z "$bucket_id" ] ;then echo "ERROR: Unable to read entry-point metadata for bucket \"$bucket\"." echo " Please make sure that is correct and, if not using" echo " the defaults, that , , and/or" echo " are correctly specified." clean usage exit 1 fi echo marker is $marker echo bucket_id is $bucket_id # read bucket instance metadata eval "radosgw-admin metadata get bucket.instance:${bucket}:$bucket_id $multisite_spec $debugging_rgwadmin >$bkt_inst" test_temp_space # examine number of bucket index shards num_shards=$(jq ".data.bucket_info.num_shards" $bkt_inst) echo number of bucket index shards is $num_shards # determine data pool if [ -z "$pool" ]; then pool=$(get_pool) fi echo data pool is $pool # handle versioned buckets export bkt_flags=$(jq ".data.bucket_info.flags" $bkt_inst) if [ -z "$bkt_flags" ] ;then echo "ERROR: unable to read instance metadata for bucket \"$bucket\"." clean exit 1 fi # search the data pool for all of the head objects that begin with the # marker that are not in namespaces (indicated by an extra underscore # and colon) and then strip away all but the rgw object name, # including optional locator that follows a tab. Initial underscores # are quoted with an underscore, so swap the first double with a # single. if [ -z "$lsoutput" ]; then ( rados -p $pool ls | grep "^${marker}_" >$marker_ls ) 2>/dev/null test_temp_space else ( grep "^${marker}_" "${lsoutput}" >$marker_ls ) 2>/dev/null test_temp_space fi ( sed -E 's/\t.*//' $marker_ls | grep -v -E "^${marker}__[^_]+_" | sed -E "s/^${marker}_(.*)/\1/" | sed 's/^__/_/' >$obj_list ) 2>/dev/null test_temp_space # mask bit indicating it's a versioned bucket export is_versioned=$(( $bkt_flags & 2)) export is_suspended=$(( $bkt_flags & 4)) if [ "$is_versioned" -ne 0 ] ;then echo "INFO: this bucket appears to be versioned." handle_versioned else # no additional versioned handling, so just hard link ln $obj_list $obj_list_ver fi # handle the case where the resulting object list file is empty if [ -s $obj_list_ver ] ;then : else echo "NOTICE: No head objects for bucket \"$bucket\" were found in pool \"$pool\", so nothing was recovered." clean exit 0 fi if [ -z "$proceed" ] ;then # warn user and get permission to proceed echo "NOTICE: This tool is currently considered EXPERIMENTAL." echo "The list of objects that we will attempt to restore can be found in \"$obj_list_ver\"." echo "Please review the object names in that file (either below or in another window/terminal) before proceeding." while true ; do read -p "Type \"proceed!\" to proceed, \"view\" to view object list, or \"q\" to quit: " action if [ "$action" == "q" ] ;then echo "Exiting..." clean exit 0 elif [ "$action" == "view" ] ;then echo "Viewing..." less $obj_list_ver elif [ "$action" == "proceed!" ] ;then echo "Proceeding..." break else echo "Error: response \"$action\" is not understood." fi done fi eval "radosgw-admin object reindex --bucket=$bucket --objects-file=$obj_list_ver $multisite_spec --yes-i-really-mean-it $debugging_rgwadmin" clean echo Done