diff options
Diffstat (limited to 'src/rgw/driver/rados/rgw_rados.cc')
-rw-r--r-- | src/rgw/driver/rados/rgw_rados.cc | 105 |
1 files changed, 76 insertions, 29 deletions
diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc index 2ee5f545326..a183feabe2a 100644 --- a/src/rgw/driver/rados/rgw_rados.cc +++ b/src/rgw/driver/rados/rgw_rados.cc @@ -1930,11 +1930,58 @@ int RGWRados::Bucket::List::list_objects_ordered( ": finished due to getting past requested namespace \"" << params.ns << "\"" << dendl; goto done; - } + } else if (!obj.ns.empty()) { + // We're in the namespace range and we're enforcing an empty + // namespace, therefore we can skip past a congtiguous chunk + // of namespaced entries. Namespaces are demarcated in the + // index key by underscores before and after the namespace + // name (e.g., "_somenamespace_somekey"). Also, regular + // entries might begin with an underscore, in which case + // they're escaped with another underscore (e.g., "_foobar" + // is encoded as "__foobar"). We also have to account for + // the fact that in lexical ordering there are characters + // both before underscore (e.g., uppercase letters) and + // after (e.g., lowercase letters). So that means there can + // be five distinct and meaningful regions in the lexical + // ordering of entries, which we'll use examples to help + // illustrate: + + // 1. FOOBAR (regular pre-underscore) + // 2. _BAZ_foobar (namespaced, with namespace pre-underscore) + // 3. __foobar (regular with escaped underscore) + // 4. _baz_foobar (namespaced, with namespace post-underscore) + // 5. foobar (regular, post-underscore) + + // So if we're skipping namespaces and recognize we're in + // region 2, we must skip to region 3. And if we recognize + // we're in region 4, we skip to region 5. + rgw_obj_index_key potential_marker; + if (obj.ns[0] < '_') { + // We're in region 2, so need to skip to region 3. The + // caret (^) is the ASCII character that preceeds + // underscore, so we'll set the marker to the + // caret/circumflex followed by 0xFF, so the key after can + // be in the double underscore range. + potential_marker = rgw_obj_index_key("_^\xFF"); + } else { + // we're passed the escaped underscore region (i.e., + // starting with two underscores), so we can skip past the + // underscore region + potential_marker = rgw_obj_index_key("_\xFF"); + } + + if (cur_marker < potential_marker) { + ldpp_dout(dpp, 20) << __func__ << + ": skipping past region of namespaced entries, starting with \"" << + entry.key << "\"" << dendl; + cur_marker = potential_marker; + break; // leave inner loop (for) and allow another cls call + } + } - /* we're skipping past namespaced objects */ + // we're skipping past namespaced objects ldpp_dout(dpp, 20) << __func__ << - ": skipping past namespaced objects, including \"" << entry.key << + ": skipping past individual namespaced entry \"" << entry.key << "\"" << dendl; continue; } @@ -1955,7 +2002,7 @@ int RGWRados::Bucket::List::list_objects_ordered( if (params.access_list_filter && !params.access_list_filter(obj.name, index_key.name)) { ldpp_dout(dpp, 20) << __func__ << - ": skipping past namespaced objects, including \"" << entry.key << + ": skipping past filtered out entry \"" << entry.key << "\"" << dendl; continue; } @@ -3343,12 +3390,17 @@ int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_si return 0; done_cancel: - int ret = index_op->cancel(rctx.dpp, meta.remove_objs, rctx.y, log_op); - if (ret < 0) { - ldpp_dout(rctx.dpp, 0) << "ERROR: index_op.cancel() returned ret=" << ret << dendl; - } + // if r == -ETIMEDOUT, rgw can't determine whether or not the rados op succeeded + // we shouldn't be calling index_op->cancel() in this case + // Instead, we should leave that pending entry in the index so than bucket listing can recover with check_disk_state() and cls_rgw_suggest_changes() + if (r != -ETIMEDOUT) { + int ret = index_op->cancel(rctx.dpp, meta.remove_objs, rctx.y, log_op); + if (ret < 0) { + ldpp_dout(rctx.dpp, 0) << "ERROR: index_op.cancel() returned ret=" << ret << dendl; + } - meta.canceled = true; + meta.canceled = true; + } /* we lost in a race. There are a few options: * - existing object was rewritten (ECANCELED) @@ -5252,13 +5304,7 @@ int RGWRados::restore_obj_from_cloud(RGWLCCloudTierCtx& tier_ctx, ceph::real_time restore_time = real_clock::now(); { - char buf[32]; - utime_t ut(restore_time); - snprintf(buf, sizeof(buf), "%lld.%09lld", - (long long)ut.sec(), - (long long)ut.nsec()); bufferlist bl; - bl.append(buf, 32); encode(restore_time, bl); attrs[RGW_ATTR_RESTORE_TIME] = std::move(bl); } @@ -5278,13 +5324,7 @@ int RGWRados::restore_obj_from_cloud(RGWLCCloudTierCtx& tier_ctx, delete_at = expiration_date; { - char buf[32]; - utime_t ut(expiration_date); - snprintf(buf, sizeof(buf), "%lld.%09lld", - (long long)ut.sec(), - (long long)ut.nsec()); bufferlist bl; - bl.append(buf, 32); encode(expiration_date, bl); attrs[RGW_ATTR_RESTORE_EXPIRY_DATE] = std::move(bl); } @@ -5445,7 +5485,7 @@ int RGWRados::delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& ob } /* if the bucket is not synced we can remove the meta file */ - if (!svc.zone->is_syncing_bucket_meta(bucket)) { + if (!svc.zone->is_syncing_bucket_meta()) { RGWObjVersionTracker objv_tracker; r = ctl.bucket->remove_bucket_instance_info(bucket, bucket_info, y, dpp); if (r < 0) { @@ -6105,7 +6145,11 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi const bool need_invalidate = (r == -ECANCELED); int64_t poolid = ioctx.get_id(); - if (r >= 0) { + if (r == -ETIMEDOUT) { + // rgw can't determine whether or not the delete succeeded, shouldn't be calling either of complete_del() or cancel() + // leaving that pending entry in the index so that bucket listing can recover with check_disk_state() and cls_rgw_suggest_changes() + ldpp_dout(dpp, 0) << "ERROR: rgw_rados_operate returned r=" << r << dendl; + } else if (r >= 0 || r == -ENOENT) { tombstone_cache_t *obj_tombstone_cache = store->get_tombstone_cache(); if (obj_tombstone_cache) { tombstone_entry entry{*state}; @@ -6918,13 +6962,13 @@ int RGWRados::set_attrs(const DoutPrefixProvider *dpp, RGWObjectCtx* octx, RGWBu } return 0; -} +} /* RGWRados::set_attrs() */ -static int get_part_obj_state(const DoutPrefixProvider* dpp, optional_yield y, - RGWRados* store, RGWBucketInfo& bucket_info, - RGWObjectCtx* rctx, RGWObjManifest* manifest, - int part_num, int* parts_count, bool prefetch, - RGWObjState** pstate, RGWObjManifest** pmanifest) +int RGWRados::get_part_obj_state(const DoutPrefixProvider* dpp, optional_yield y, + RGWRados* store, RGWBucketInfo& bucket_info, + RGWObjectCtx* rctx, RGWObjManifest* manifest, + int part_num, int* parts_count, bool prefetch, + RGWObjState** pstate, RGWObjManifest** pmanifest) { if (!manifest) { return -ERR_INVALID_PART; @@ -7003,6 +7047,9 @@ static int get_part_obj_state(const DoutPrefixProvider* dpp, optional_yield y, // update the object size sm->state.size = part_manifest.get_obj_size(); + if (!sm->state.attrset.count(RGW_ATTR_COMPRESSION)) { + sm->state.accounted_size = sm->state.size; + } *pmanifest = &part_manifest; return 0; |