From de9ce1e27322cdf5c1572a0b8eff4a33547757c0 Mon Sep 17 00:00:00 2001 From: "lu.shasha" Date: Mon, 2 Dec 2024 17:10:23 +0800 Subject: rgw: fix stale entries in bucket indexes If rados_osd_op_timeout is set, the primary osd is slow, the rgw_rados_operate for deleting the rgw head obj may return -ETIMEDOUT rgw can't determine whether or not the delete succeeded, we shouldn't be calling index_op.complete_del or cancel() Instead, we should leave that pending entry in the index so than bucket listing can recover with check_disk_state() and cls_rgw_suggest_changens() When raced with another delete op, deleting the rgw head obj may return ENOENT, calling index_op.complete_del() instead of index_op.cancel() Fixes: https://tracker.ceph.com/issues/58965 Signed-off-by: Shasha Lu --- src/rgw/driver/rados/rgw_rados.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/rgw') diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc index a133b54dc59..37b6dd37465 100644 --- a/src/rgw/driver/rados/rgw_rados.cc +++ b/src/rgw/driver/rados/rgw_rados.cc @@ -6105,7 +6105,11 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi const bool need_invalidate = (r == -ECANCELED); int64_t poolid = ioctx.get_id(); - if (r >= 0) { + if (r == -ETIMEDOUT) { + // rgw can't determine whether or not the delete succeeded, shouldn't be calling either of complete_del() or cancel() + // leaving that pending entry in the index so that bucket listing can recover with check_disk_state() and cls_rgw_suggest_changes() + ldpp_dout(dpp, 0) << "ERROR: rgw_rados_operate returned r=" << r << dendl; + } else if (r >= 0 || r == -ENOENT) { tombstone_cache_t *obj_tombstone_cache = store->get_tombstone_cache(); if (obj_tombstone_cache) { tombstone_entry entry{*state}; -- cgit v1.2.3