summaryrefslogtreecommitdiffstats
path: root/src/rgw/rgw-gap-list
blob: 983e8930577905794fcca20f3085d6d1475dc21e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
#!/usr/bin/env bash

# Last revision 2023-01-13

# NOTE: This script based on rgw-orphan-list but doing the
# reverse calculation.

# NOTE: The awk included in this script replaces the 'ceph-diff-sorted'
# utility but duplicates its functionality.  This was done to minimize
# the number of times the massive data set must be iterated to complete
# the task.

# IMPORTANT: Affects order produced by 'sort'.
export LC_ALL=C

trap "exit 1" TERM
TOP_PID=$$

out_dir="$PWD"
timestamp=$(date -u +%Y%m%d%H%M)
lspools_err="${out_dir}/lspools-${timestamp}.error"
rados_out="${out_dir}/rados-${timestamp}.intermediate"
rados_err="${out_dir}/rados-${timestamp}.error"
rgwadmin_out="${out_dir}/radosgw-admin-${timestamp}.intermediate"
rgwadmin_err="${out_dir}/radosgw-admin-${timestamp}.error"
gap_out="${out_dir}/gap-list-${timestamp}.gap"


# field separator
# contains ascii 0xFE, designed to be a  character that won't appear
# in normal output, can only be a single character due to use in the 
# sort command
fs=$(echo -e "\xFE")

log() {
  echo $(date +%F\ %T) $(hostname -s) "$1"
}

#
# checkReturn RETURNCODE MESSAGE TERMINATE
#  RETURNCODE - ( usually $? ) of previous command
#  MESSAGE    - Message to print on non-zero return code
#  TERMINATE  - non-empty == terminate the script on non-zero return code
#
checkReturn() {
  if [ $1 -ne 0 ]; then
    error_addon=""
    if [ ! -z "$3" ]; then
      error_addon="; Terminating"
    fi
    log "ERROR: ${2} failed: returned ${1}${error_addon}"
    if [ ! -z "$3" ]; then
      >&2 echo
      >&2 echo '***'
      >&2 echo '*** WARNING: The results are incomplete. Do not use! ***'
      >&2 echo '***'
      kill -s TERM $TOP_PID
    fi
  fi
}

prompt_pool() {
  # note: all prompts go to stderr so stdout contains just the result
  rados lspools >"$temp_file" 2>"$lspools_err"
  checkReturn $? "Listing pools" 1

  >&2 echo ""
  >&2 echo "Available pools:"
  >&2 sed 's/^/    /' "$temp_file" # list pools and indent
  >&2 echo ""
  >&2 echo "Which Rados Gateway Data pool do you want to search for gaps? "
  >&2 echo ""
  >&2 echo "NOTE: If your installation has multiple bucket data pools using "
  >&2 echo "      bucket placement policies, please enter a space separated "
  >&2 echo "      list of bucket data pools to enumerate."
  >&2 echo ""
  local mypool
  read mypool
  echo $mypool
}

radosgw_radoslist() {
  log "Running 'radosgw-admin bucket radoslist'."
  rm -f "$rgwadmin_flag" &> /dev/null
  radosgw-admin bucket radoslist --rgw-obj-fs="$fs" >"$rgwadmin_out" 2>"$rgwadmin_err"
  RETVAL=$?
  if [ "$RETVAL" -ne 0 ] ;then
    touch "$rgwadmin_flag"
  fi
  checkReturn $RETVAL "radosgw-admin radoslist" 1
  log "Completed 'radosgw-admin bucket radoslist'."

  log "Sorting 'radosgw-admin bucket radoslist' output."
  sort -T ${temp_prefix} --field-separator="$fs" -k1,1 -u "$rgwadmin_out" > "$rgwadmin_temp"
  checkReturn $? "Sorting 'radosgw-admin bucket radoslist' output" 1
  log "Completed sorting 'radosgw-admin bucket radoslist'."

  log "Moving 'radosgw-admin bucket radoslist' output."
  mv -f "$rgwadmin_temp" "$rgwadmin_out"
  checkReturn $? "Moving 'radosgw-admin bucket radoslist' output" 1
  log "Completed moving 'radosgw-admin bucket radoslist' output."
}

rados_ls() {
  log "Starting 'rados ls' function."
  rm -f "$rados_flag" &> /dev/null
  rm -f "$rados_out" &> /dev/null
  local mypool
  for mypool in $pool; do
    log "Running 'rados ls' on pool ${mypool}."
    rados ls --pool="$mypool" >>"$rados_out" 2>"$rados_err"
    RETVAL=$?
    if [ "$RETVAL" -ne 0 ] ;then
      touch "$rados_flag"
    fi
    checkReturn $RETVAL "'rados ls' on pool ${mypool}" 1
    log "Completed 'rados ls' on pool ${mypool}."
  done
  if [ ! -e "$rados_flag" ]; then
    log "Sorting 'rados ls' output(s)."
    sort -T ${temp_prefix} -u "$rados_out" >"$rados_temp"
    checkReturn $? "Sorting 'rados ls' output(s)" 1

    log "Moving sorted output(s)."
    mv -f "$rados_temp" "$rados_out"
    checkReturn $? "Moving temp file to output file" 1
    log "Sorting 'rados ls' output(s) complete."
  fi
}

usage() {
  >&2 cat << EOF

WARNING   WARNING   WARNING   WARNING   WARNING   WARNING   WARNING
WARNING: 
WARNING: Command option format has changed.  Please check closely.
WARNING: 
WARNING   WARNING   WARNING   WARNING   WARNING   WARNING   WARNING

Usage: $0 [-m] [-p <pool>] [-t <temp_dir>]

Where:
  -m               Optionally, run the two listings in multiple threads.
                   --See NOTE below--

  -p <pool>        The RGW bucket data pool name, if omitted, pool name
                   will be prompted for during execution.
                   Multiple pools can be supplied as a space separated
                   double quoted list.

  -t <temp_dir>    Optionally, set the directory to use for temp space.
                   This may be required if /tmp is low on space.

NOTE: This tool is currently considered to be EXPERIMENTAL.

NOTE: False positives are possible. False positives would likely
      appear as objects that were never deleted and are fully
      intact. All results should therefore be verified.

NOTE: Multithread listing may increase performance but may also increase
      the risk of false positives when the cluster is undergoing
      modifications during the listing processes. In addition to the
      above, false positives might also include objects that were
      intentionally deleted.

EOF
  exit 1
}

multithread=0
error=0
temp_prefix="/tmp"
while getopts ":mp:t:" o; do
  case "${o}" in
    m)
      multithread=1
    ;;
    p)
      pool=${OPTARG}
    ;;
    t)
      if [ -d ${OPTARG} ]; then
        temp_prefix=${OPTARG}
      else
        echo
        echo "ERROR: Temporary directory does not exist: ${OPTARG}"
        error=1
      fi
    ;;
    *)
      echo
      echo "ERROR: Unrecognized argument: ${o}"
      error=1
    ;;
  esac
done
shift $((OPTIND-1))

temp_file=${temp_prefix}/gap-tmp.$$
rados_temp=${temp_prefix}/rados-tmp.$$
rgwadmin_temp=${temp_prefix}/radosgw-admin-tmp.$$
rados_flag=${temp_prefix}/rados-flag.$$
rgwadmin_flag=${temp_prefix}/radosgw-admin-flag.$$
incremental_grep_awk="${temp_prefix}/ig-${$}.awk"

if [ $error -gt 0 ]; then
  usage
fi

if [ -z "$pool" ]; then
  pool="$(prompt_pool)"
fi

error=0
rados ${CEPH_ARGS} lspools > ${temp_file}
checkReturn $? "rados lspools" 1
for mypool in $pool; do
  if [ $(grep -c "^${mypool}$" "${temp_file}") -eq 0 ]; then
      echo
      echo "ERROR: Supplied pool does not exist: ${mypool}"
      error=1
  fi
done

if [ $error -gt 0 ]; then
  exit 1
fi

log "Pool is \"$pool\"."
log "Note: output files produced will be tagged with the current timestamp -- ${timestamp}."

if [ $multithread -eq 1 ] ;then
  startsecs=$(date +%s)
  log "Starting multithread tasks..."
  rados_ls &
  radosgw_radoslist &
  jobs &> /dev/null  # without this, the myjobs count always equals 1 (confused)
  myjobs=$(jobs | wc -l)
  while [ $myjobs -gt 0 ]; do
    # provide minutely status update
    if [ $(( ($(date +%s)-$startsecs) % 60 )) -eq 0 ]; then
      echo
      deltasecs=$(( $(date +%s)-$startsecs ))
      log "Waiting for listing tasks to complete. Running ${myjobs} tasks for ${deltasecs} seconds."
    fi
    sleep 1
    echo -n .
    if [ -e "$rgw_admin_flag" ]; then
      exit 1
    fi
    if [ -e "$rados_flag" ]; then
      exit 2
    fi
    jobs &> /dev/null  # without this, the myjobs count always equals 1 (confused)
    myjobs=$(jobs | wc -l)
  done
  echo
else
  rados_ls
  radosgw_radoslist
fi

if [ -e "$rgw_admin_flag" ]; then
  exit 1
fi

if [ -e "$rados_flag" ]; then
  exit 2
fi

for myfile in $rados_out $rgwadmin_out; do
  if [ ! -s "${myfile}" ]; then
    log "ERROR: Empty file detected: ${myfile}"
    log "ERROR: RESULTS ARE INCOMPLETE - DO NOT USE"
    exit 1
  fi 
done

# Create an awk script in a file for parsing the two command outputs.
log "Creating awk script for comparing outputs: ${incremental_grep_awk}"

cat <<"EOF" >$incremental_grep_awk
# This awk script is used by rgw-gap-list and will sequence through
# each line in $rados_out and $rgwadmin_out exactly once.
#
# During this iteration:
#  * The 1st column of $rgwadmin_out is compared to the line of
#    $rados_out.
#  * If they are equal, the next line of $rados_out is read in and the
#    next line of $rgwadmin_out is provided via normal awk iteration.
#  * If a value appears in $rgwadmin_out, but not $rados_out, this 
#    indicates a possible deleted tail object and the accompanying
#    bucket / user object name is output, assuming it had not been
#    previously identified.
#    - A map of outputted bucket / user object is maintained in memory
#  * If a value appears in $rados_out, but not in $rgwadmin_out, the
#    $rados_out file is iterated until the $rados_out line is equal
#    or > (alphabetically) the value from the $rgwadmin_out file.

function usage() {
  print "Example Usage:">>"/dev/stderr"
  print "   # limit $fs to single char that will not appear in either output">>"/dev/stderr"
  print "   # The below is Octal 376, or Hex 0xFE">>"/dev/stderr"
  print "">>"/dev/stderr"
  print "   $ fs=$(echo -e \"\\0376\")  ">>"/dev/stderr"
  print "   $ rados ls -p default.rgw.buckets.data > rados_out.txt">>"/dev/stderr"
  print "   $ radosgw-admin bucket radoslist --rgw-obj-fs=\"$fs\" \\">>"/dev/stderr"
  print "       | sort --field-separator=\"$fs\" -k 1,1 > rgwadmin_out.txt">>"/dev/stderr"
  print " ">>"/dev/stderr"
  print "   $ awk -F \"$fs\" \\">>"/dev/stderr"
  print "         -v filetwo=rados_out.txt \\">>"/dev/stderr"
  print "         -v map_out=MappedOutput.txt \\">>"/dev/stderr"
  print "         -f ig_awk \\">>"/dev/stderr"
  print "         rgwadmin_out.txt">>"/dev/stderr"
  print "">>"/dev/stderr"
  print "   Result will be provided in the 'MappedOutput.txt' file in this">>"/dev/stderr"
  print "   example.  If you'd prefer the output to be sorted, you can run">>"/dev/stderr"
  print "   $ sort MappedOutput.txt > SortedMappedOutput.txt">>"/dev/stderr"
  print "">>"/dev/stderr"
  print "">>"/dev/stderr"
  exit 1
}

function get_date_time() {
  dtstr="date +%F\\ %T"
  dtstr | getline mydt
  close(dtstr)
  return mydt
}

function status_out() {
  printf("%s % 17d\t% 17d\t% 12d\n",get_date_time(),f1_count,f2_count,lineoutCount)>>"/dev/stderr"
}

function advance_f2() {
  if ((getline f2line<filetwo) <= 0) {
    f2_eof=1
  } else {
    f2_count++
    bcount=split(f2line,b,FS)
  }
}

function test_lines() {
  if ($1==b[1]) {
    advance_f2()
    return 0
  } else if ($1<b[1]) {
    line_out()
    return 1
  } else {
    return 2
  }
}

function findnul(myfield) {
  for(i=1;i<=split(myfield,a,"");i++) {
    if(ord[a[i]]==0) {
      return 1
    }
  }
  return 0
}

function line_out() {
  if(findnul($1)) {
    # If the RADOS object name has a NUL character, skip output
    return
  }
  # Note: Intentionally using $2 and $NF below
  # Use of $NF eliminates risk of exhausting input field count
  if ($2" "$NF!=lastline) {
    # Only output a given bucket/Obj combination once
    printf("Bucket: \"%s\"  Object: \"%s\"\n", $2, $NF)>>map_out
    lastline=$2" "$NF
    lineoutCount++
  }
}

BEGIN {
  if(filetwo==""||map_out=="") {
     print "">>"/dev/stderr"
     print "">>"/dev/stderr"
     print "Missing parameter."
     print "">>"/dev/stderr"
     print "">>"/dev/stderr"
     usage()
  }
  status_delta=100000
  f1_count=0
  f2_count=0
  advance_f2()
  printf("%s File 1 Line Count\tFile 2 Line Count\tPotentially Impacted Objects\n",get_date_time())>>"/dev/stderr"
  for(n=0;n<256;n++) {
    ord[sprintf("%c",n)]=n
  }
}

{
  f1_count++
  if(f2_eof==0) {
    if(test_lines()==2) {
      while ($1>b[1]) {
        advance_f2()
      }
      test_lines()
    }
  } else {
    # If EOF hit, dump all remaining lines since they're missing
    # from filetwo
    line_out()
  }
  if((f1_count % status_delta)==0) {
    status_out()
  }
}

END {
  if(f1_count>0) {
    status_out()
  }
}

EOF


log "Begin identifying potentially impacted user object names."

echo -n > "$temp_file" # Ensure the file is empty
awk -F "$fs" -v filetwo=$rados_out -v map_out=$temp_file -f $incremental_grep_awk $rgwadmin_out
checkReturn $? "Identifying potentially impacted user object names" 1

log "Begin sorting results."
sort -T ${temp_prefix} "$temp_file" > "$gap_out"
checkReturn $? "sorting results" 1
rm -f "$temp_file"

found=$(wc -l < "$gap_out")
mydate=$(date +%F\ %T)

log "Done."

cat << EOF

Found $found *possible* gaps.
The results can be found in "${gap_out}".

Intermediate files: "${rados_out}" and "${rgwadmin_out}".

***
*** WARNING: This is EXPERIMENTAL code and the results should be used
***          with CAUTION and VERIFIED. Not everything listed is an
***          actual gap. EXPECT false positives. Every result
***          produced should be verified.
***
EOF