summaryrefslogtreecommitdiffstats
path: root/src/rgw/driver/rados/rgw_rados.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/rgw/driver/rados/rgw_rados.cc')
-rw-r--r--src/rgw/driver/rados/rgw_rados.cc105
1 files changed, 76 insertions, 29 deletions
diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc
index a133b54dc59..69075c506f1 100644
--- a/src/rgw/driver/rados/rgw_rados.cc
+++ b/src/rgw/driver/rados/rgw_rados.cc
@@ -1930,11 +1930,58 @@ int RGWRados::Bucket::List::list_objects_ordered(
": finished due to getting past requested namespace \"" <<
params.ns << "\"" << dendl;
goto done;
- }
+ } else if (!obj.ns.empty()) {
+ // We're in the namespace range and we're enforcing an empty
+ // namespace, therefore we can skip past a congtiguous chunk
+ // of namespaced entries. Namespaces are demarcated in the
+ // index key by underscores before and after the namespace
+ // name (e.g., "_somenamespace_somekey"). Also, regular
+ // entries might begin with an underscore, in which case
+ // they're escaped with another underscore (e.g., "_foobar"
+ // is encoded as "__foobar"). We also have to account for
+ // the fact that in lexical ordering there are characters
+ // both before underscore (e.g., uppercase letters) and
+ // after (e.g., lowercase letters). So that means there can
+ // be five distinct and meaningful regions in the lexical
+ // ordering of entries, which we'll use examples to help
+ // illustrate:
+
+ // 1. FOOBAR (regular pre-underscore)
+ // 2. _BAZ_foobar (namespaced, with namespace pre-underscore)
+ // 3. __foobar (regular with escaped underscore)
+ // 4. _baz_foobar (namespaced, with namespace post-underscore)
+ // 5. foobar (regular, post-underscore)
+
+ // So if we're skipping namespaces and recognize we're in
+ // region 2, we must skip to region 3. And if we recognize
+ // we're in region 4, we skip to region 5.
+ rgw_obj_index_key potential_marker;
+ if (obj.ns[0] < '_') {
+ // We're in region 2, so need to skip to region 3. The
+ // caret (^) is the ASCII character that preceeds
+ // underscore, so we'll set the marker to the
+ // caret/circumflex followed by 0xFF, so the key after can
+ // be in the double underscore range.
+ potential_marker = rgw_obj_index_key("_^\xFF");
+ } else {
+ // we're passed the escaped underscore region (i.e.,
+ // starting with two underscores), so we can skip past the
+ // underscore region
+ potential_marker = rgw_obj_index_key("_\xFF");
+ }
+
+ if (cur_marker < potential_marker) {
+ ldpp_dout(dpp, 20) << __func__ <<
+ ": skipping past region of namespaced entries, starting with \"" <<
+ entry.key << "\"" << dendl;
+ cur_marker = potential_marker;
+ break; // leave inner loop (for) and allow another cls call
+ }
+ }
- /* we're skipping past namespaced objects */
+ // we're skipping past namespaced objects
ldpp_dout(dpp, 20) << __func__ <<
- ": skipping past namespaced objects, including \"" << entry.key <<
+ ": skipping past individual namespaced entry \"" << entry.key <<
"\"" << dendl;
continue;
}
@@ -1955,7 +2002,7 @@ int RGWRados::Bucket::List::list_objects_ordered(
if (params.access_list_filter &&
!params.access_list_filter(obj.name, index_key.name)) {
ldpp_dout(dpp, 20) << __func__ <<
- ": skipping past namespaced objects, including \"" << entry.key <<
+ ": skipping past filtered out entry \"" << entry.key <<
"\"" << dendl;
continue;
}
@@ -3343,12 +3390,17 @@ int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_si
return 0;
done_cancel:
- int ret = index_op->cancel(rctx.dpp, meta.remove_objs, rctx.y, log_op);
- if (ret < 0) {
- ldpp_dout(rctx.dpp, 0) << "ERROR: index_op.cancel() returned ret=" << ret << dendl;
- }
+ // if r == -ETIMEDOUT, rgw can't determine whether or not the rados op succeeded
+ // we shouldn't be calling index_op->cancel() in this case
+ // Instead, we should leave that pending entry in the index so than bucket listing can recover with check_disk_state() and cls_rgw_suggest_changes()
+ if (r != -ETIMEDOUT) {
+ int ret = index_op->cancel(rctx.dpp, meta.remove_objs, rctx.y, log_op);
+ if (ret < 0) {
+ ldpp_dout(rctx.dpp, 0) << "ERROR: index_op.cancel() returned ret=" << ret << dendl;
+ }
- meta.canceled = true;
+ meta.canceled = true;
+ }
/* we lost in a race. There are a few options:
* - existing object was rewritten (ECANCELED)
@@ -5252,13 +5304,7 @@ int RGWRados::restore_obj_from_cloud(RGWLCCloudTierCtx& tier_ctx,
ceph::real_time restore_time = real_clock::now();
{
- char buf[32];
- utime_t ut(restore_time);
- snprintf(buf, sizeof(buf), "%lld.%09lld",
- (long long)ut.sec(),
- (long long)ut.nsec());
bufferlist bl;
- bl.append(buf, 32);
encode(restore_time, bl);
attrs[RGW_ATTR_RESTORE_TIME] = std::move(bl);
}
@@ -5278,13 +5324,7 @@ int RGWRados::restore_obj_from_cloud(RGWLCCloudTierCtx& tier_ctx,
delete_at = expiration_date;
{
- char buf[32];
- utime_t ut(expiration_date);
- snprintf(buf, sizeof(buf), "%lld.%09lld",
- (long long)ut.sec(),
- (long long)ut.nsec());
bufferlist bl;
- bl.append(buf, 32);
encode(expiration_date, bl);
attrs[RGW_ATTR_RESTORE_EXPIRY_DATE] = std::move(bl);
}
@@ -5445,7 +5485,7 @@ int RGWRados::delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& ob
}
/* if the bucket is not synced we can remove the meta file */
- if (!svc.zone->is_syncing_bucket_meta(bucket)) {
+ if (!svc.zone->is_syncing_bucket_meta()) {
RGWObjVersionTracker objv_tracker;
r = ctl.bucket->remove_bucket_instance_info(bucket, bucket_info, y, dpp);
if (r < 0) {
@@ -6105,7 +6145,11 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi
const bool need_invalidate = (r == -ECANCELED);
int64_t poolid = ioctx.get_id();
- if (r >= 0) {
+ if (r == -ETIMEDOUT) {
+ // rgw can't determine whether or not the delete succeeded, shouldn't be calling either of complete_del() or cancel()
+ // leaving that pending entry in the index so that bucket listing can recover with check_disk_state() and cls_rgw_suggest_changes()
+ ldpp_dout(dpp, 0) << "ERROR: rgw_rados_operate returned r=" << r << dendl;
+ } else if (r >= 0 || r == -ENOENT) {
tombstone_cache_t *obj_tombstone_cache = store->get_tombstone_cache();
if (obj_tombstone_cache) {
tombstone_entry entry{*state};
@@ -6918,13 +6962,13 @@ int RGWRados::set_attrs(const DoutPrefixProvider *dpp, RGWObjectCtx* octx, RGWBu
}
return 0;
-}
+} /* RGWRados::set_attrs() */
-static int get_part_obj_state(const DoutPrefixProvider* dpp, optional_yield y,
- RGWRados* store, RGWBucketInfo& bucket_info,
- RGWObjectCtx* rctx, RGWObjManifest* manifest,
- int part_num, int* parts_count, bool prefetch,
- RGWObjState** pstate, RGWObjManifest** pmanifest)
+int RGWRados::get_part_obj_state(const DoutPrefixProvider* dpp, optional_yield y,
+ RGWRados* store, RGWBucketInfo& bucket_info,
+ RGWObjectCtx* rctx, RGWObjManifest* manifest,
+ int part_num, int* parts_count, bool prefetch,
+ RGWObjState** pstate, RGWObjManifest** pmanifest)
{
if (!manifest) {
return -ERR_INVALID_PART;
@@ -7003,6 +7047,9 @@ static int get_part_obj_state(const DoutPrefixProvider* dpp, optional_yield y,
// update the object size
sm->state.size = part_manifest.get_obj_size();
+ if (!sm->state.attrset.count(RGW_ATTR_COMPRESSION)) {
+ sm->state.accounted_size = sm->state.size;
+ }
*pmanifest = &part_manifest;
return 0;