diff options
author | Matt Benjamin <mbenjamin@redhat.com> | 2024-04-10 23:58:15 +0200 |
---|---|---|
committer | Matt Benjamin <mbenjamin@redhat.com> | 2024-07-03 20:38:14 +0200 |
commit | 0b9bbcbd60e7b5a3f05fadc08dd6353412c169f8 (patch) | |
tree | c9b4df286bcc399929f10581d7a0186318ca2ba8 | |
parent | rgw_cksum: test round-trip constructor (armored) (diff) | |
download | ceph-0b9bbcbd60e7b5a3f05fadc08dd6353412c169f8.tar.xz ceph-0b9bbcbd60e7b5a3f05fadc08dd6353412c169f8.zip |
rgw_cksum: multipart upload checksums
includes commits:
* fixes init-multipart header return
* introduce checksum to SAL MultipartPart interface
* thread optional checksum through DataProcessor
* code complete multipart checksum verify
* fix formatter
* fix ckecksum format for multipart objects in GET/HEAD ops
* always return parts_count from ReadOp::prepare() if applicable
This behavior is used when returning the checksum of a multipart
upload object.
* tweak conditional multipart_parts_count
* add checksum output to ListMultipart
* fix nil-return from GetHeaderCksumResult
* re-arm truncated if re-entering list-parts
* complete-multipart w/list-parts
* validate supplied checksum in CompleteMultipart
* verify checksum type against initial checksum algorithm
* rgw_op: suppress more x-amz headers
* final fixes and cleanups
* remove unused t0
Signed-off-by: Matt Benjamin <mbenjamin@redhat.com>
29 files changed, 549 insertions, 146 deletions
diff --git a/src/rgw/driver/d4n/rgw_sal_d4n.cc b/src/rgw/driver/d4n/rgw_sal_d4n.cc index 0f3c0ddb224..712cebbfc05 100644 --- a/src/rgw/driver/d4n/rgw_sal_d4n.cc +++ b/src/rgw/driver/d4n/rgw_sal_d4n.cc @@ -811,6 +811,7 @@ int D4NFilterWriter::process(bufferlist&& data, uint64_t offset) int D4NFilterWriter::complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -830,7 +831,7 @@ int D4NFilterWriter::complete(size_t accounted_size, const std::string& etag, ldpp_dout(save_dpp, 10) << "D4NFilterWriter::" << __func__ << "(): ObjectDirectory set method failed." << dendl; /* Retrieve complete set of attrs */ - int ret = next->complete(accounted_size, etag, mtime, set_mtime, attrs, + int ret = next->complete(accounted_size, etag, mtime, set_mtime, attrs, cksum, delete_at, if_match, if_nomatch, user_data, zones_trace, canceled, rctx, flags); obj->get_obj_attrs(rctx.y, save_dpp, NULL); diff --git a/src/rgw/driver/d4n/rgw_sal_d4n.h b/src/rgw/driver/d4n/rgw_sal_d4n.h index 5c71582406f..e7d041d2a19 100644 --- a/src/rgw/driver/d4n/rgw_sal_d4n.h +++ b/src/rgw/driver/d4n/rgw_sal_d4n.h @@ -219,14 +219,15 @@ class D4NFilterWriter : public FilterWriter { virtual int prepare(optional_yield y); virtual int process(bufferlist&& data, uint64_t offset) override; virtual int complete(size_t accounted_size, const std::string& etag, - ceph::real_time *mtime, ceph::real_time set_mtime, - std::map<std::string, bufferlist>& attrs, - ceph::real_time delete_at, - const char *if_match, const char *if_nomatch, - const std::string *user_data, - rgw_zone_set *zones_trace, bool *canceled, - const req_context& rctx, - uint32_t flags) override; + ceph::real_time *mtime, ceph::real_time set_mtime, + std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, + ceph::real_time delete_at, + const char *if_match, const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, bool *canceled, + const req_context& rctx, + uint32_t flags) override; bool is_atomic() { return atomic; }; const DoutPrefixProvider* dpp() { return save_dpp; } }; diff --git a/src/rgw/driver/daos/rgw_sal_daos.cc b/src/rgw/driver/daos/rgw_sal_daos.cc index 703e8dd77bf..6d83c7d159a 100644 --- a/src/rgw/driver/daos/rgw_sal_daos.cc +++ b/src/rgw/driver/daos/rgw_sal_daos.cc @@ -1596,6 +1596,7 @@ int DaosMultipartUpload::init(const DoutPrefixProvider* dpp, optional_yield y, multipart_upload_info upload_info; upload_info.dest_placement = dest_placement; + upload_info.cksum_type = cksum_type; ent.encode(bl); encode(attrs, bl); @@ -1968,6 +1969,7 @@ int DaosMultipartUpload::get_info(const DoutPrefixProvider* dpp, // Now decode the placement rule decode(upload_info, iter); + cksum_type = upload_info.cksum_type; placement = upload_info.dest_placement; *rule = &placement; diff --git a/src/rgw/driver/daos/rgw_sal_daos.h b/src/rgw/driver/daos/rgw_sal_daos.h index 39749a0d33c..8cff96088ee 100644 --- a/src/rgw/driver/daos/rgw_sal_daos.h +++ b/src/rgw/driver/daos/rgw_sal_daos.h @@ -742,6 +742,7 @@ class DaosAtomicWriter : public StoreWriter { virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time* mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char* if_match, const char* if_nomatch, const std::string* user_data, rgw_zone_set* zones_trace, bool* canceled, @@ -788,6 +789,7 @@ class DaosMultipartWriter : public StoreWriter { virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time* mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char* if_match, const char* if_nomatch, const std::string* user_data, rgw_zone_set* zones_trace, bool* canceled, @@ -810,6 +812,10 @@ class DaosMultipartPart : public StoreMultipartPart { virtual const std::string& get_etag() { return info.etag; } virtual ceph::real_time& get_mtime() { return info.modified; } + virtual const std::optional<rgw::cksum::Cksum>& get_cksum() { + return info.cksum; + } + friend class DaosMultipartUpload; }; diff --git a/src/rgw/driver/motr/rgw_sal_motr.h b/src/rgw/driver/motr/rgw_sal_motr.h index ac1af5376f3..4d9c189993b 100644 --- a/src/rgw/driver/motr/rgw_sal_motr.h +++ b/src/rgw/driver/motr/rgw_sal_motr.h @@ -791,6 +791,7 @@ class MotrAtomicWriter : public StoreWriter { virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -839,6 +840,7 @@ public: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -888,6 +890,9 @@ public: virtual uint64_t get_size() { return info.accounted_size; } virtual const std::string& get_etag() { return info.etag; } virtual ceph::real_time& get_mtime() { return info.modified; } + virtual const std::optional<rgw::cksum::Cksum>& get_cksum() { + return info.cksum; + } RGWObjManifest& get_manifest() { return info.manifest; } diff --git a/src/rgw/driver/posix/rgw_sal_posix.cc b/src/rgw/driver/posix/rgw_sal_posix.cc index 6f5289f58bf..145a5789653 100644 --- a/src/rgw/driver/posix/rgw_sal_posix.cc +++ b/src/rgw/driver/posix/rgw_sal_posix.cc @@ -2509,6 +2509,7 @@ int POSIXMultipartUpload::init(const DoutPrefixProvider *dpp, optional_yield y, meta_obj = get_meta_obj(); + mp_obj.upload_info.cksum_type = cksum_type; mp_obj.upload_info.dest_placement = dest_placement; bufferlist bl; @@ -2796,9 +2797,12 @@ int POSIXMultipartWriter::process(bufferlist&& data, uint64_t offset) return obj->write(offset, data, dpp, null_yield); } -int POSIXMultipartWriter::complete(size_t accounted_size, const std::string& etag, +int POSIXMultipartWriter::complete( + size_t accounted_size, + const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -2828,6 +2832,7 @@ int POSIXMultipartWriter::complete(size_t accounted_size, const std::string& eta info.num = part_num; info.etag = etag; + info.cksum = cksum; info.mtime = set_mtime; bufferlist bl; @@ -2867,6 +2872,7 @@ int POSIXAtomicWriter::process(bufferlist&& data, uint64_t offset) int POSIXAtomicWriter::complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, diff --git a/src/rgw/driver/posix/rgw_sal_posix.h b/src/rgw/driver/posix/rgw_sal_posix.h index d2ea90b0109..7483139da33 100644 --- a/src/rgw/driver/posix/rgw_sal_posix.h +++ b/src/rgw/driver/posix/rgw_sal_posix.h @@ -479,19 +479,24 @@ struct POSIXUploadPartInfo { uint32_t num{0}; std::string etag; ceph::real_time mtime; + std::optional<rgw::cksum::Cksum> cksum; void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); + ENCODE_START(2, 1, bl); encode(num, bl); encode(etag, bl); encode(mtime, bl); + encode(cksum, bl); ENCODE_FINISH(bl); } void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); + DECODE_START_LEGACY_COMPAT_LEN(2, 1, 1, bl); decode(num, bl); decode(etag, bl); decode(mtime, bl); + if (struct_v > 1) { + decode(cksum, bl); + } DECODE_FINISH(bl); } }; @@ -514,8 +519,12 @@ public: virtual uint64_t get_size() { return shadow->get_size(); } virtual const std::string& get_etag() { return info.etag; } virtual ceph::real_time& get_mtime() { return info.mtime; } + virtual const std::optional<rgw::cksum::Cksum>& get_cksum() { + return info.cksum; + } - int load(const DoutPrefixProvider* dpp, optional_yield y, POSIXDriver* driver, rgw_obj_key& key); + int load(const DoutPrefixProvider* dpp, optional_yield y, POSIXDriver* driver, + rgw_obj_key& key); friend class POSIXMultipartUpload; }; @@ -605,6 +614,7 @@ public: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -645,6 +655,7 @@ public: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, diff --git a/src/rgw/driver/rados/rgw_putobj_processor.cc b/src/rgw/driver/rados/rgw_putobj_processor.cc index d41678cdb06..79be7b3209b 100644 --- a/src/rgw/driver/rados/rgw_putobj_processor.cc +++ b/src/rgw/driver/rados/rgw_putobj_processor.cc @@ -338,19 +338,21 @@ int AtomicObjectProcessor::prepare(optional_yield y) return 0; } -int AtomicObjectProcessor::complete(size_t accounted_size, - const std::string& etag, - ceph::real_time *mtime, - ceph::real_time set_mtime, - rgw::sal::Attrs& attrs, - ceph::real_time delete_at, - const char *if_match, - const char *if_nomatch, - const std::string *user_data, - rgw_zone_set *zones_trace, - bool *pcanceled, - const req_context& rctx, - uint32_t flags) +int AtomicObjectProcessor::complete( + size_t accounted_size, + const std::string& etag, + ceph::real_time *mtime, + ceph::real_time set_mtime, + rgw::sal::Attrs& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, + ceph::real_time delete_at, + const char *if_match, + const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, + bool *pcanceled, + const req_context& rctx, + uint32_t flags) { int r = writer.drain(); if (r < 0) { @@ -486,19 +488,21 @@ int MultipartObjectProcessor::prepare(optional_yield y) return prepare_head(); } -int MultipartObjectProcessor::complete(size_t accounted_size, - const std::string& etag, - ceph::real_time *mtime, - ceph::real_time set_mtime, - std::map<std::string, bufferlist>& attrs, - ceph::real_time delete_at, - const char *if_match, - const char *if_nomatch, - const std::string *user_data, - rgw_zone_set *zones_trace, - bool *pcanceled, - const req_context& rctx, - uint32_t flags) +int MultipartObjectProcessor::complete( + size_t accounted_size, + const std::string& etag, + ceph::real_time *mtime, + ceph::real_time set_mtime, + std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, + ceph::real_time delete_at, + const char *if_match, + const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, + bool *pcanceled, + const req_context& rctx, + uint32_t flags) { int r = writer.drain(); if (r < 0) { @@ -541,6 +545,7 @@ int MultipartObjectProcessor::complete(size_t accounted_size, } info.num = part_num; info.etag = etag; + info.cksum = cksum; info.size = actual_size; info.accounted_size = accounted_size; info.modified = real_clock::now(); @@ -701,11 +706,16 @@ int AppendObjectProcessor::prepare(optional_yield y) return 0; } -int AppendObjectProcessor::complete(size_t accounted_size, const string &etag, ceph::real_time *mtime, - ceph::real_time set_mtime, rgw::sal::Attrs& attrs, - ceph::real_time delete_at, const char *if_match, const char *if_nomatch, - const string *user_data, rgw_zone_set *zones_trace, bool *pcanceled, - const req_context& rctx, uint32_t flags) +int AppendObjectProcessor::complete( + size_t accounted_size, + const string &etag, ceph::real_time *mtime, + ceph::real_time set_mtime, rgw::sal::Attrs& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, + ceph::real_time delete_at, const char *if_match, + const char *if_nomatch, + const string *user_data, rgw_zone_set *zones_trace, + bool *pcanceled, + const req_context& rctx, uint32_t flags) { int r = writer.drain(); if (r < 0) @@ -764,7 +774,8 @@ int AppendObjectProcessor::complete(size_t accounted_size, const string &etag, c } r = obj_op.write_meta(actual_size + cur_size, accounted_size + *cur_accounted_size, - attrs, rctx, writer.get_trace(), flags & rgw::sal::FLAG_LOG_OP); + attrs, rctx, writer.get_trace(), + flags & rgw::sal::FLAG_LOG_OP); if (r < 0) { return r; } diff --git a/src/rgw/driver/rados/rgw_putobj_processor.h b/src/rgw/driver/rados/rgw_putobj_processor.h index 655428f83e9..3f3b0b31fcf 100644 --- a/src/rgw/driver/rados/rgw_putobj_processor.h +++ b/src/rgw/driver/rados/rgw_putobj_processor.h @@ -193,6 +193,7 @@ class AtomicObjectProcessor : public ManifestObjectProcessor { int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -241,6 +242,7 @@ class MultipartObjectProcessor : public ManifestObjectProcessor { int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -279,7 +281,9 @@ class MultipartObjectProcessor : public ManifestObjectProcessor { int prepare(optional_yield y) override; int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, - std::map<std::string, bufferlist>& attrs, ceph::real_time delete_at, + std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, + ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, rgw_zone_set *zones_trace, bool *canceled, const req_context& rctx, diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc index 5e0f2c30ca9..088435d8573 100644 --- a/src/rgw/driver/rados/rgw_rados.cc +++ b/src/rgw/driver/rados/rgw_rados.cc @@ -23,6 +23,7 @@ #include "common/BackTrace.h" #include "common/ceph_time.h" +#include "rgw_cksum.h" #include "rgw_sal.h" #include "rgw_zone.h" #include "rgw_cache.h" @@ -4489,8 +4490,9 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& obj_ctx, for (i = 0; i < MAX_COMPLETE_RETRY; i++) { bool canceled = false; ret = processor.complete(accounted_size, etag, mtime, set_mtime, - attrs, delete_at, nullptr, nullptr, nullptr, - zones_trace, &canceled, rctx, rgw::sal::FLAG_LOG_OP); + attrs, rgw::cksum::no_cksum, delete_at, nullptr, nullptr, + nullptr, zones_trace, &canceled, rctx, + rgw::sal::FLAG_LOG_OP); if (ret < 0) { goto set_err_state; } @@ -5047,7 +5049,8 @@ int RGWRados::copy_obj_data(RGWObjectCtx& obj_ctx, } const req_context rctx{dpp, y, nullptr}; - return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, + return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, + rgw::cksum::no_cksum, delete_at, nullptr, nullptr, nullptr, nullptr, nullptr, rctx, log_op ? rgw::sal::FLAG_LOG_OP : 0); } diff --git a/src/rgw/driver/rados/rgw_sal_rados.cc b/src/rgw/driver/rados/rgw_sal_rados.cc index 0addbe01923..b6eab4357d3 100644 --- a/src/rgw/driver/rados/rgw_sal_rados.cc +++ b/src/rgw/driver/rados/rgw_sal_rados.cc @@ -3135,6 +3135,7 @@ int RadosMultipartUpload::init(const DoutPrefixProvider *dpp, optional_yield y, multipart_upload_info upload_info; upload_info.dest_placement = dest_placement; + upload_info.cksum_type = cksum_type; if (obj_legal_hold) { upload_info.obj_legal_hold_exist = true; @@ -3527,6 +3528,7 @@ int RadosMultipartUpload::get_info(const DoutPrefixProvider *dpp, optional_yield ldpp_dout(dpp, 0) << "ERROR: failed to decode multipart upload info" << dendl; return -EIO; } + cksum_type = upload_info.cksum_type; placement = upload_info.dest_placement; upload_information = upload_info; *rule = &placement; @@ -3726,6 +3728,7 @@ int RadosAtomicWriter::process(bufferlist&& data, uint64_t offset) int RadosAtomicWriter::complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -3733,8 +3736,9 @@ int RadosAtomicWriter::complete(size_t accounted_size, const std::string& etag, const req_context& rctx, uint32_t flags) { - return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, - if_match, if_nomatch, user_data, zones_trace, canceled, rctx, flags); + return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, + cksum, delete_at, if_match, if_nomatch, + user_data, zones_trace, canceled, rctx, flags); } int RadosAppendWriter::prepare(optional_yield y) @@ -3750,6 +3754,7 @@ int RadosAppendWriter::process(bufferlist&& data, uint64_t offset) int RadosAppendWriter::complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -3757,8 +3762,9 @@ int RadosAppendWriter::complete(size_t accounted_size, const std::string& etag, const req_context& rctx, uint32_t flags) { - return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, - if_match, if_nomatch, user_data, zones_trace, canceled, rctx, flags); + return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, + cksum, delete_at, if_match, if_nomatch, + user_data, zones_trace, canceled, rctx, flags); } int RadosMultipartWriter::prepare(optional_yield y) @@ -3771,9 +3777,12 @@ int RadosMultipartWriter::process(bufferlist&& data, uint64_t offset) return processor.process(std::move(data), offset); } -int RadosMultipartWriter::complete(size_t accounted_size, const std::string& etag, +int RadosMultipartWriter::complete( + size_t accounted_size, + const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -3781,8 +3790,9 @@ int RadosMultipartWriter::complete(size_t accounted_size, const std::string& eta const req_context& rctx, uint32_t flags) { - return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, - if_match, if_nomatch, user_data, zones_trace, canceled, rctx, flags); + return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, + cksum, delete_at, if_match, if_nomatch, + user_data, zones_trace, canceled, rctx, flags); } bool RadosZoneGroup::placement_target_exists(std::string& target) const diff --git a/src/rgw/driver/rados/rgw_sal_rados.h b/src/rgw/driver/rados/rgw_sal_rados.h index ff36c2edfd8..4abc7112c9c 100644 --- a/src/rgw/driver/rados/rgw_sal_rados.h +++ b/src/rgw/driver/rados/rgw_sal_rados.h @@ -776,6 +776,9 @@ public: virtual uint64_t get_size() { return info.accounted_size; } virtual const std::string& get_etag() { return info.etag; } virtual ceph::real_time& get_mtime() { return info.modified; } + virtual const std::optional<rgw::cksum::Cksum>& get_cksum() { + return info.cksum; + } /* For RadosStore code */ RGWObjManifest& get_manifest() { return info.manifest; } @@ -980,6 +983,7 @@ public: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -1029,6 +1033,7 @@ public: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -1075,6 +1080,7 @@ public: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, diff --git a/src/rgw/rgw_basic_types.h b/src/rgw/rgw_basic_types.h index 771466ffae5..d09f06a4a98 100644 --- a/src/rgw/rgw_basic_types.h +++ b/src/rgw/rgw_basic_types.h @@ -21,6 +21,7 @@ #pragma once #include <string> +#include <optional> #include <fmt/format.h> #include "include/types.h" @@ -256,10 +257,10 @@ struct RGWUploadPartInfo { uint64_t size; uint64_t accounted_size{0}; std::string etag; - rgw::cksum::Cksum cksum; ceph::real_time modified; RGWObjManifest manifest; RGWCompressionInfo cs_info; + std::optional<rgw::cksum::Cksum> cksum; // Previous part obj prefixes. Recorded here for later cleanup. std::set<std::string> past_prefixes; diff --git a/src/rgw/rgw_cksum.h b/src/rgw/rgw_cksum.h index 06ccacf05e0..c0a314d467f 100644 --- a/src/rgw/rgw_cksum.h +++ b/src/rgw/rgw_cksum.h @@ -15,6 +15,8 @@ #include <boost/algorithm/string/predicate.hpp> #include <cstdint> +#include <cstring> +#include <optional> #include <stdint.h> #include <string> #include <string_view> @@ -81,6 +83,11 @@ namespace rgw { namespace cksum { namespace ba = boost::algorithm; + static inline std::string safe_upcase_str(std::string s) { + std::transform(s.begin(), s.end(), s.begin(), ::toupper); + return s; + } + class Cksum { public: static constexpr std::array<Desc, 8> checksums = @@ -102,23 +109,41 @@ namespace rgw { namespace cksum { value_type digest; Cksum(Type _type = Type::none) : type(_type) {} + Cksum(Type _type, const char* _armored_text) + : type(_type) { + const auto& ckd = checksums[uint16_t(type)]; + (void) ceph_unarmor((char*) digest.begin(), (char*) digest.begin() + ckd.digest_size, + _armored_text, _armored_text + std::strlen(_armored_text)); + } - static const char* type_string(const Type type) { + const char* type_string() const { return (Cksum::checksums[uint16_t(type)]).name; } - std::string aws_name() { - return fmt::format("x-amz-checksum-{}", type_string(type)); + const bool aws() const { + return (Cksum::checksums[uint16_t(type)]).aws(); + } + + std::string aws_name() const { + return fmt::format("x-amz-checksum-{}", type_string()); } - std::string rgw_name() { - return fmt::format("x-rgw-checksum-{}", type_string(type)); + std::string rgw_name() const { + return fmt::format("x-rgw-checksum-{}", type_string()); } - std::string header_name() { - return ((Cksum::checksums[uint16_t(type)]).aws()) ? - aws_name() : - rgw_name(); + std::string header_name() const { + return (aws()) ? aws_name() : rgw_name(); + } + + std::string element_name() const { + std::string ts{type_string()}; + return fmt::format("Checksum{}", safe_upcase_str(ts)); + } + + std::string_view raw() const { + const auto& ckd = checksums[uint16_t(type)]; + return std::string_view((char*) digest.begin(), ckd.digest_size); } std::string to_armor() const { @@ -172,6 +197,8 @@ namespace rgw { namespace cksum { }; /* Cksum */ WRITE_CLASS_ENCODER(Cksum); + static inline const std::optional<rgw::cksum::Cksum> no_cksum{std::nullopt}; + static inline Type parse_cksum_type(const char* name) { for (const auto& ck : Cksum::checksums) { diff --git a/src/rgw/rgw_cksum_pipe.cc b/src/rgw/rgw_cksum_pipe.cc index c11d821d7ec..f9250303bce 100644 --- a/src/rgw/rgw_cksum_pipe.cc +++ b/src/rgw/rgw_cksum_pipe.cc @@ -32,45 +32,19 @@ namespace rgw::putobj { dv(rgw::cksum::digest_factory(_type)), _digest(cksum::get_digest(dv)), cksum_hdr(_hdr), _state(State::DIGEST) - { - cksum::Digest* digest = cksum::get_digest(dv); - /* XXXX remove this */ - std::cout << "ctor had digest " << _digest - << " and got digest: " << digest - << std::endl; - } + {} std::unique_ptr<RGWPutObj_Cksum> RGWPutObj_Cksum::Factory( rgw::sal::DataProcessor* next, const RGWEnv& env) { /* look for matching headers */ - auto match = [&env] () -> const cksum_hdr_t { - /* If the individual checksum value you provide through - x-amz-checksum-algorithm doesn't match the checksum algorithm - you set through x-amz-sdk-checksum-algorithm, Amazon S3 ignores - any provided ChecksumAlgorithm parameter and uses the checksum - algorithm that matches the provided value in - x-amz-checksum-algorithm. - https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html - */ - for (const auto hk : {"HTTP_X_AMZ_CHECKSUM_ALGORITHM", - "HTTP_X_AMZ_SDK_CHECKSUM_ALGORITHM"}) { - auto hv = env.get(hk); - if (hv) { - return cksum_hdr_t(hk, hv); - } - } - return cksum_hdr_t(nullptr, nullptr); - }; - - auto algo_header = match(); + auto algo_header = cksum_algorithm_hdr(env); if (algo_header.first) { if (algo_header.second) { auto cksum_type = cksum::parse_cksum_type(algo_header.second); - return std::make_unique<RGWPutObj_Cksum>( - next, - cksum_type, - std::move(algo_header)); + return + std::make_unique<RGWPutObj_Cksum>( + next, cksum_type, std::move(algo_header)); } /* malformed checksum algorithm header(s) */ throw rgw::io::Exception(EINVAL, std::system_category()); diff --git a/src/rgw/rgw_cksum_pipe.h b/src/rgw/rgw_cksum_pipe.h index 047fbcb4ddd..dce389ec8ff 100644 --- a/src/rgw/rgw_cksum_pipe.h +++ b/src/rgw/rgw_cksum_pipe.h @@ -15,17 +15,78 @@ #pragma once +#include <cstdint> #include <utility> #include <tuple> #include <cstring> #include "rgw_cksum.h" +#include "rgw_common.h" #include "rgw_putobj.h" namespace rgw::putobj { namespace cksum = rgw::cksum; using cksum_hdr_t = std::pair<const char*, const char*>; - + + static inline const cksum_hdr_t cksum_algorithm_hdr(const RGWEnv& env) { + /* If the individual checksum value you provide through + x-amz-checksum-algorithm doesn't match the checksum algorithm + you set through x-amz-sdk-checksum-algorithm, Amazon S3 ignores + any provided ChecksumAlgorithm parameter and uses the checksum + algorithm that matches the provided value in + x-amz-checksum-algorithm. + https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html + */ + for (const auto hk : {"HTTP_X_AMZ_CHECKSUM_ALGORITHM", + "HTTP_X_AMZ_SDK_CHECKSUM_ALGORITHM"}) { + auto hv = env.get(hk); + if (hv) { + return cksum_hdr_t(hk, hv); + } + } + return cksum_hdr_t(nullptr, nullptr); + } /* cksum_algorithm_hdr */ + + using GetHeaderCksumResult = std::pair<cksum::Cksum, std::string_view>; + + static inline GetHeaderCksumResult get_hdr_cksum(const RGWEnv& env) { + cksum::Type cksum_type; + auto algo_hdr = cksum_algorithm_hdr(env); + if (algo_hdr.first) { + if (algo_hdr.second) { + cksum_type = cksum::parse_cksum_type(algo_hdr.second); + auto hk = fmt::format("HTTP_X_AMZ_CHECKSUM_{}", algo_hdr.second); + auto hv = env.get(hk.c_str()); + if (hv) { + return + GetHeaderCksumResult(cksum::Cksum(cksum_type, hv), + std::string_view(hv, std::strlen(hv))); + } + } + } + return GetHeaderCksumResult(cksum::Cksum(cksum_type), ""); + } /* get_hdr_cksum */ + + /* CompleteMultipartUpload can have a checksum value but unlike + * PutObject, it won't have a checksum algorithm header, so we + * need to search for one */ + static inline GetHeaderCksumResult find_hdr_cksum(const RGWEnv& env) { + cksum::Type cksum_type; + for (int16_t ix = int16_t(cksum::Type::crc32); + ix <= uint16_t(cksum::Type::blake3); ++ix) { + cksum_type = cksum::Type(ix); + auto hk = fmt::format("HTTP_X_AMZ_CHECKSUM_{}", + safe_upcase_str(to_string(cksum_type))); + auto hv = env.get(hk.c_str()); + if (hv) { + return + GetHeaderCksumResult(cksum::Cksum(cksum_type, hv), + std::string_view(hv, std::strlen(hv))); + } + } + return GetHeaderCksumResult(cksum::Cksum(cksum_type), ""); + } /* find_hdr_cksum */ + // PutObj filter for streaming checksums class RGWPutObj_Cksum : public rgw::putobj::Pipe { diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index 929ebc60b92..da2617f3835 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -17,6 +17,7 @@ #pragma once #include <array> +#include <cstdint> #include <string_view> #include <atomic> #include <unordered_map> @@ -82,8 +83,9 @@ using ceph::crypto::MD5; #define RGW_ATTR_RATELIMIT RGW_ATTR_PREFIX "ratelimit" #define RGW_ATTR_LC RGW_ATTR_PREFIX "lc" #define RGW_ATTR_CORS RGW_ATTR_PREFIX "cors" -#define RGW_ATTR_ETAG RGW_ATTR_PREFIX "etag" +#define RGW_ATTR_ETAG RGW_ATTR_PREFIX "etag" #define RGW_ATTR_CKSUM RGW_ATTR_PREFIX "cksum" +#define RGW_ATTR_CKSUM_ALGORITHM RGW_ATTR_PREFIX "x-amz-checksum-algorithm" #define RGW_ATTR_BUCKETS RGW_ATTR_PREFIX "buckets" #define RGW_ATTR_META_PREFIX RGW_ATTR_PREFIX RGW_AMZ_META_PREFIX #define RGW_ATTR_CONTENT_TYPE RGW_ATTR_PREFIX "content_type" @@ -162,9 +164,6 @@ using ceph::crypto::MD5; #define RGW_ATTR_UNIX_KEY1 RGW_ATTR_PREFIX "unix-key1" #define RGW_ATTR_UNIX1 RGW_ATTR_PREFIX "unix1" -/* Content Checksums */ -#define RGW_ATTR_AMZ_CKSUM RGW_ATTR_PREFIX "x-amz-content-checksum" - #define RGW_ATTR_CRYPT_PREFIX RGW_ATTR_PREFIX "crypt." #define RGW_ATTR_CRYPT_MODE RGW_ATTR_CRYPT_PREFIX "mode" #define RGW_ATTR_CRYPT_KEYMD5 RGW_ATTR_CRYPT_PREFIX "keymd5" @@ -1519,25 +1518,33 @@ struct multipart_upload_info bool obj_legal_hold_exist{false}; RGWObjectRetention obj_retention; RGWObjectLegalHold obj_legal_hold; + rgw::cksum::Type cksum_type {rgw::cksum::Type::none}; void encode(bufferlist& bl) const { - ENCODE_START(2, 1, bl); + ENCODE_START(3, 1, bl); encode(dest_placement, bl); encode(obj_retention_exist, bl); encode(obj_legal_hold_exist, bl); encode(obj_retention, bl); encode(obj_legal_hold, bl); + uint16_t ct{uint16_t(cksum_type)}; + encode(ct, bl); ENCODE_FINISH(bl); } void decode(bufferlist::const_iterator& bl) { - DECODE_START(2, bl); + DECODE_START_LEGACY_COMPAT_LEN(3, 1, 1, bl); decode(dest_placement, bl); if (struct_v >= 2) { decode(obj_retention_exist, bl); decode(obj_legal_hold_exist, bl); decode(obj_retention, bl); decode(obj_legal_hold, bl); + if (struct_v >= 3) { + uint16_t ct; + decode(ct, bl); + cksum_type = rgw::cksum::Type(ct); + } } else { obj_retention_exist = false; obj_legal_hold_exist = false; @@ -1985,6 +1992,16 @@ static inline std::string ys_header_mangle(std::string_view name) return out; } /* ys_header_mangle */ +static inline std::string& upcase_str(std::string& s) { + std::transform(s.begin(), s.end(), s.begin(), ::toupper); + return s; +} + +static inline std::string safe_upcase_str(std::string s) { + std::transform(s.begin(), s.end(), s.begin(), ::toupper); + return s; +} + extern int rgw_bucket_parse_bucket_instance(const std::string& bucket_instance, std::string *bucket_name, std::string *bucket_id, int *shard_id); boost::intrusive_ptr<CephContext> diff --git a/src/rgw/rgw_data_access.cc b/src/rgw/rgw_data_access.cc index 06e13c3890c..74461a69399 100644 --- a/src/rgw/rgw_data_access.cc +++ b/src/rgw/rgw_data_access.cc @@ -1,10 +1,12 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab ft=cpp +#include <optional> #include "rgw_data_access.h" #include "rgw_acl_s3.h" #include "rgw_aio_throttle.h" #include "rgw_compression.h" +#include "rgw_cksum.h" #include "common/BackTrace.h" #define dout_subsys ceph_subsys_rgw @@ -208,12 +210,13 @@ int RGWDataAccess::Object::put(bufferlist& data, const req_context rctx{dpp, y, nullptr}; return processor->complete(obj_size, etag, - &mtime, mtime, - attrs, delete_at, - nullptr, nullptr, - puser_data, - nullptr, nullptr, - rctx, rgw::sal::FLAG_LOG_OP); + &mtime, mtime, attrs, + rgw::cksum::no_cksum, + delete_at, + nullptr, nullptr, + puser_data, + nullptr, nullptr, + rctx, rgw::sal::FLAG_LOG_OP); } void RGWDataAccess::Object::set_policy(const RGWAccessControlPolicy& policy) diff --git a/src/rgw/rgw_file.cc b/src/rgw/rgw_file.cc index 66e883e7257..16d0047ff53 100644 --- a/src/rgw/rgw_file.cc +++ b/src/rgw/rgw_file.cc @@ -2018,9 +2018,9 @@ namespace rgw { } op_ret = processor->complete(state->obj_size, etag, &mtime, real_time(), attrs, - (delete_at ? *delete_at : real_time()), - if_match, if_nomatch, nullptr, nullptr, nullptr, - rctx, rgw::sal::FLAG_LOG_OP); + rgw::cksum::no_cksum, (delete_at ? *delete_at : real_time()), + if_match, if_nomatch, nullptr, nullptr, nullptr, + rctx, rgw::sal::FLAG_LOG_OP); if (op_ret != 0) { /* revert attr updates */ rgw_fh->set_mtime(omtime); diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 2f8e2abf916..94a780008fa 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -2,6 +2,7 @@ // vim: ts=8 sw=2 smarttab ft=cpp #include <errno.h> +#include <optional> #include <stdlib.h> #include <system_error> #include <unistd.h> @@ -24,6 +25,7 @@ #include "common/ceph_json.h" #include "common/static_ptr.h" #include "common/perf_counters_key.h" +#include "rgw_cksum.h" #include "rgw_common.h" #include "rgw_tracer.h" @@ -4583,10 +4585,13 @@ void RGWPutObj::execute(optional_yield y) tracepoint(rgw_op, processor_complete_enter, s->req_id.c_str()); const req_context rctx{this, s->yield, s->trace.get()}; - op_ret = processor->complete(s->obj_size, etag, &mtime, real_time(), attrs, - (delete_at ? *delete_at : real_time()), if_match, if_nomatch, - (user_data.empty() ? nullptr : &user_data), nullptr, nullptr, - rctx, complete_flags); + + op_ret = + processor->complete(s->obj_size, etag, &mtime, real_time(), attrs, + cksum, (delete_at ? *delete_at : real_time()), + if_match, if_nomatch, + (user_data.empty() ? nullptr : &user_data), + nullptr, nullptr, rctx, complete_flags); tracepoint(rgw_op, processor_complete_exit, s->req_id.c_str()); if (op_ret < 0) { return; @@ -4598,7 +4603,7 @@ void RGWPutObj::execute(optional_yield y) ldpp_dout(this, 1) << "ERROR: publishing notification failed, with error: " << ret << dendl; // too late to rollback operation, hence op_ret is not set here } -} +} /* RGWPutObj::execute() */ int RGWPostObj::init_processing(optional_yield y) { @@ -4804,11 +4809,13 @@ void RGWPostObj::execute(optional_yield y) emplace_attr(RGW_ATTR_COMPRESSION, std::move(tmp)); } + /* TODO: implement POST checksums */ const req_context rctx{this, s->yield, s->trace.get()}; - op_ret = processor->complete(s->obj_size, etag, nullptr, real_time(), attrs, - (delete_at ? *delete_at : real_time()), - nullptr, nullptr, nullptr, nullptr, nullptr, - rctx, rgw::sal::FLAG_LOG_OP); + op_ret = processor->complete(s->obj_size, etag, nullptr, real_time(), + attrs, rgw::cksum::no_cksum, + (delete_at ? *delete_at : real_time()), + nullptr, nullptr, nullptr, nullptr, nullptr, + rctx, rgw::sal::FLAG_LOG_OP); if (op_ret < 0) { return; } @@ -6309,7 +6316,6 @@ void RGWInitMultipart::execute(optional_yield y) { multipart_trace = tracing::rgw::tracer.start_trace(tracing::rgw::MULTIPART, s->trace_enabled); bufferlist aclbl, tracebl; - rgw::sal::Attrs attrs; op_ret = get_params(y); if (op_ret < 0) { @@ -6344,6 +6350,7 @@ void RGWInitMultipart::execute(optional_yield y) upload_id); upload->obj_legal_hold = obj_legal_hold; upload->obj_retention = obj_retention; + upload->cksum_type = cksum_algo; op_ret = upload->init(this, s->yield, s->owner, s->dest_placement, attrs); if (op_ret == 0) { @@ -6376,9 +6383,110 @@ void RGWCompleteMultipart::pre_exec() rgw_bucket_object_pre_exec(s); } +static inline int +try_sum_part_cksums(const DoutPrefixProvider *dpp, + CephContext *cct, + rgw::sal::MultipartUpload* upload, + RGWMultiCompleteUpload* parts, + std::optional<rgw::cksum::Cksum>& out_cksum, + optional_yield y) +{ + /* 1. need checksum-algorithm header (if invalid, fail) + 2. conditional on have-checksum, + 3. need digest for supplied algo + 4. iterate over parts, confirm each has same algo, if not, fail + 5. for each part-checksum, accumlate bytes into new checksum + 6. return armored and append "-<nparts>" + 7. verify -- if invalid, fail */ + + /* rgw_sal.h says that list_parts is called for the side effect of loading + * the parts of an upload into "cache"--the api is strange and truncated + * flag suggests that it needs to be called multiple times to handle large + * uploads--but does not explain how that affects the hidden cache; I'm + * assuming it turns over? */ + + int op_ret = 0; + bool truncated = false; + int marker = 0; + auto num_parts = int(parts->parts.size()); + + rgw::cksum::Type& cksum_type = upload->cksum_type; + + int again_count{0}; + again: + op_ret = upload->list_parts(dpp, cct, num_parts, marker, + &marker, &truncated, y); + if (op_ret < 0) { + return op_ret; + } + + if (truncated) { + ldpp_dout(dpp, 20) + << fmt::format( + "WARNING: {} upload->list_parts {} {} truncated, again_count={}!", + __func__, num_parts, marker, again_count) + << dendl; + truncated = false; + ++again_count; + goto again; + } + + if (cksum_type == rgw::cksum::Type::none) [[unlikely]] { + /* ordinary, no-checksum case */ + return 0; + } + + rgw::cksum::DigestVariant dv = rgw::cksum::digest_factory(cksum_type); + rgw::cksum::Digest* digest = rgw::cksum::get_digest(dv); + + /* returns the parts (currently?) in cache */ + auto parts_ix{0}; + auto& parts_map = upload->get_parts(); + for (auto& part : parts_map) { + ++parts_ix; + auto& part_cksum = part.second->get_cksum(); + ldpp_dout(dpp, 16) + << fmt::format("INFO: {} iterate part: {} {} {}", + __func__, parts_ix, part_cksum->type_string(), + part_cksum->to_armor()) + << dendl; + if ((part_cksum->type != cksum_type)) { + /* if parts have inconsistent checksum, fail now */ + ldpp_dout(dpp, 4) + << fmt::format( + "ERROR: multipart part checksum type mismatch\n\tcomplete " + "multipart header={} part={}", + to_string(part_cksum->type), to_string(cksum_type)) + << dendl; + op_ret = -ERR_INVALID_REQUEST; + return op_ret; + } + + /* the checksum of the final object is a checksum (of the same type, + * presumably) of the concatenated checksum bytes of the parts, plus + * "-<num-parts>. See + * https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html#large-object-checksums + */ + auto ckr = part_cksum->raw(); + digest->Update((unsigned char *)ckr.data(), ckr.length()); + } /* all-parts */ + + /* we cannot verify this checksum, only compute it */ + out_cksum = rgw::cksum::finalize_digest(digest, cksum_type); + + ldpp_dout(dpp, 16) + << fmt::format("INFO: {} combined checksum {} {}-{}", + __func__, + out_cksum->type_string(), + out_cksum->to_armor(), num_parts) + << dendl; + + return op_ret; +} /* try_sum_part_chksums */ + void RGWCompleteMultipart::execute(optional_yield y) { - RGWMultiCompleteUpload *parts; + RGWMultiCompleteUpload* parts; RGWMultiXMLParser parser; std::unique_ptr<rgw::sal::MultipartUpload> upload; off_t ofs = 0; @@ -6418,7 +6526,6 @@ void RGWCompleteMultipart::execute(optional_yield y) return; } - if ((int)parts->parts.size() > s->cct->_conf->rgw_multipart_part_upload_limit) { op_ret = -ERANGE; @@ -6426,6 +6533,23 @@ void RGWCompleteMultipart::execute(optional_yield y) } upload = s->bucket->get_multipart_upload(s->object->get_name(), upload_id); + ldpp_dout(this, 16) << + fmt::format("INFO: {}->get_multipart_upload for obj {}, {} cksum_type {}", + s->bucket->get_name(), + s->object->get_name(), upload_id, + (!!upload) ? to_string(upload->cksum_type) : 0) + << dendl; + + rgw_placement_rule* dest_placement; + op_ret = upload->get_info(this, s->yield, &dest_placement); + if (op_ret < 0) { + /* XXX this fails inconsistently when !checksum */ + ldpp_dout(this, 0) << + "ERROR: MultipartUpload::get_info() for placement failed " + << "ret=" << op_ret << dendl; + op_ret = -ERR_INTERNAL_ERROR; + return; + } RGWCompressionInfo cs_info; bool compressed = false; @@ -6437,8 +6561,8 @@ void RGWCompleteMultipart::execute(optional_yield y) meta_obj->set_in_extra_data(true); meta_obj->set_hash_source(s->object->get_name()); - /*take a cls lock on meta_obj to prevent racing completions (or retries) - from deleting the parts*/ + /* take a cls lock on meta_obj to prevent racing completions (or retries) + from deleting the parts*/ int max_lock_secs_mp = s->cct->_conf.get_val<int64_t>("rgw_mp_lock_max_time"); utime_t dur(max_lock_secs_mp, 0); @@ -6468,6 +6592,14 @@ void RGWCompleteMultipart::execute(optional_yield y) extract_span_context(meta_obj->get_attrs(), trace_ctx); multipart_trace = tracing::rgw::tracer.add_span(name(), trace_ctx); + /* checksum computation */ + if (upload->cksum_type != rgw::cksum::Type::none) { + op_ret = try_sum_part_cksums(this, s->cct, upload.get(), parts, cksum, y); + if (op_ret < 0) { + return; + } + } + if (s->bucket->versioning_enabled()) { if (!version_id.empty()) { s->object->set_instance(version_id); @@ -6476,17 +6608,47 @@ void RGWCompleteMultipart::execute(optional_yield y) version_id = s->object->get_instance(); } } - s->object->set_attrs(meta_obj->get_attrs()); + + auto target_attrs = meta_obj->get_attrs(); + + if (cksum) { + armored_cksum = + fmt::format("{}-{}", cksum->to_armor(), parts->parts.size()); + + /* validate computed checksum against supplied checksum, if present */ + auto [hdr_cksum, supplied_cksum] = + rgw::putobj::find_hdr_cksum(*(s->info.env)); + ldpp_dout(this, 10) << fmt::format("INFO: client supplied checksum {}: {}", + hdr_cksum.header_name(), supplied_cksum) + << dendl; + + if (! (supplied_cksum.empty()) && + (supplied_cksum != armored_cksum)) { + op_ret = -ERR_INVALID_REQUEST; + return; + } + + buffer::list cksum_bl; + cksum->encode(cksum_bl); + target_attrs.emplace(RGW_ATTR_CKSUM, std::move(cksum_bl)); + } /* cksum */ + + s->object->set_attrs(target_attrs); // make reservation for notification if needed std::unique_ptr<rgw::sal::Notification> res; - res = driver->get_notification(s->object.get(), nullptr, s, rgw::notify::ObjectCreatedCompleteMultipartUpload, y); + res = driver->get_notification( + s->object.get(), nullptr, s, + rgw::notify::ObjectCreatedCompleteMultipartUpload, y); op_ret = res->publish_reserve(this); if (op_ret < 0) { return; } - op_ret = upload->complete(this, y, s->cct, parts->parts, remove_objs, accounted_size, compressed, cs_info, ofs, s->req_id, s->owner, olh_epoch, s->object.get()); + op_ret = + upload->complete(this, y, s->cct, parts->parts, remove_objs, accounted_size, + compressed, cs_info, ofs, s->req_id, s->owner, olh_epoch, + s->object.get()); if (op_ret < 0) { ldpp_dout(this, 0) << "ERROR: upload complete failed ret=" << op_ret << dendl; return; @@ -6533,8 +6695,9 @@ bool RGWCompleteMultipart::check_previously_completed(const RGWMultiCompleteUplo char petag[CEPH_CRYPTO_MD5_DIGESTSIZE]; hex_to_buf(partetag.c_str(), petag, CEPH_CRYPTO_MD5_DIGESTSIZE); hash.Update((const unsigned char *)petag, sizeof(petag)); - ldpp_dout(this, 20) << __func__ << "() re-calculating multipart etag: part: " - << index << ", etag: " << partetag << dendl; + ldpp_dout(this, 20) + << __func__ << "() re-calculating multipart etag: part: " + << index << ", etag: " << partetag << dendl; } unsigned char final_etag[CEPH_CRYPTO_MD5_DIGESTSIZE]; @@ -6651,6 +6814,21 @@ void RGWListMultipart::execute(optional_yield y) if (op_ret < 0) return; + iter = attrs.find(RGW_ATTR_CKSUM); + if (iter != attrs.end()) { + auto bliter = iter->second.cbegin(); + try { + rgw::cksum::Cksum tcksum; + tcksum.decode(bliter); + cksum = std::move(tcksum); + } catch (buffer::error& err) { + ldpp_dout(this, 0) << "ERROR: could not decode stored cksum, caught buffer::error" << dendl; + op_ret = -EIO; + } + } + if (op_ret < 0) + return; + op_ret = upload->list_parts(this, s->cct, max_parts, marker, NULL, &truncated, y); } @@ -7464,12 +7642,15 @@ int RGWBulkUploadOp::handle_file(const std::string_view path, attrs.emplace(RGW_ATTR_COMPRESSION, std::move(tmp)); } + /* XXX I don't think bulk upload can support checksums */ + /* Complete the transaction. */ const req_context rctx{this, s->yield, s->trace.get()}; op_ret = processor->complete(size, etag, nullptr, ceph::real_time(), - attrs, ceph::real_time() /* delete_at */, - nullptr, nullptr, nullptr, nullptr, nullptr, - rctx, rgw::sal::FLAG_LOG_OP); + attrs, rgw::cksum::no_cksum, + ceph::real_time() /* delete_at */, + nullptr, nullptr, nullptr, nullptr, nullptr, + rctx, rgw::sal::FLAG_LOG_OP); if (op_ret < 0) { ldpp_dout(this, 20) << "processor::complete returned op_ret=" << op_ret << dendl; } diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h index 111fef519ac..8f476fb8ff2 100644 --- a/src/rgw/rgw_op.h +++ b/src/rgw/rgw_op.h @@ -1254,8 +1254,7 @@ protected: RGWObjectRetention *obj_retention; RGWObjectLegalHold *obj_legal_hold; - // optional cksum - boost::optional<rgw::cksum::Cksum> cksum; + std::optional<rgw::cksum::Cksum> cksum; public: RGWPutObj() : ofs(0), @@ -1850,6 +1849,8 @@ protected: //object lock std::optional<RGWObjectRetention> obj_retention = std::nullopt; std::optional<RGWObjectLegalHold> obj_legal_hold = std::nullopt; + rgw::sal::Attrs attrs; + rgw::cksum::Type cksum_algo{rgw::cksum::Type::none}; public: RGWInitMultipart() {} @@ -1876,6 +1877,10 @@ protected: jspan_ptr multipart_trace; ceph::real_time upload_time; std::unique_ptr<rgw::sal::Notification> res; + std::unique_ptr<rgw::sal::Object> meta_obj; + std::optional<rgw::cksum::Cksum> cksum; + std::optional<std::string> armored_cksum; + off_t ofs = 0; public: RGWCompleteMultipart() {} @@ -1919,6 +1924,7 @@ protected: RGWAccessControlPolicy policy; bool truncated; rgw_placement_rule* placement; + std::optional<rgw::cksum::Cksum> cksum; public: RGWListMultipart() { @@ -2156,7 +2162,12 @@ inline int rgw_get_request_metadata(const DoutPrefixProvider *dpp, "x-amz-server-side-encryption-customer-algorithm", "x-amz-server-side-encryption-customer-key", "x-amz-server-side-encryption-customer-key-md5", + /* XXX agreed w/cbodley that probably a cleanup is needed here--we probably + * don't want to store these, esp. under user.rgw */ "x-amz-storage-class", + "x-amz-content-sha256", + "x-amz-checksum-algorithm", + "x-amz-date" }; size_t valid_meta_count = 0; diff --git a/src/rgw/rgw_process.cc b/src/rgw/rgw_process.cc index 10e544b577d..8c93f77278b 100644 --- a/src/rgw/rgw_process.cc +++ b/src/rgw/rgw_process.cc @@ -351,7 +351,7 @@ int process_request(const RGWProcessEnv& penv, goto done; } req->op = op; - ldpp_dout(op, 10) << "op=" << typeid(*op).name() << dendl; + ldpp_dout(op, 10) << "op=" << typeid(*op).name() << " " << dendl; s->op_type = op->get_type(); try { diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc index 3dcedb1a28b..8a4218855a4 100644 --- a/src/rgw/rgw_rest_s3.cc +++ b/src/rgw/rgw_rest_s3.cc @@ -15,6 +15,8 @@ #include "common/safe_io.h" #include "common/errno.h" #include "auth/Crypto.h" +#include "rgw_cksum.h" +#include "rgw_common.h" #include <boost/algorithm/string.hpp> #include <boost/algorithm/string/replace.hpp> #include <boost/algorithm/string/predicate.hpp> @@ -68,7 +70,7 @@ #include "rgw_rest_iam.h" #include "rgw_sts.h" #include "rgw_sal_rados.h" - +#include "rgw_cksum_pipe.h" #include "rgw_s3select.h" #define dout_context g_ceph_context @@ -479,7 +481,7 @@ int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t bl_ofs, } catch (const buffer::error&) {} } - if (multipart_parts_count) { + if (multipart_parts_count && multipart_parts_count > 0) { dump_header(s, "x-amz-mp-parts-count", *multipart_parts_count); } @@ -502,7 +504,12 @@ int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t bl_ofs, try { rgw::cksum::Cksum cksum; decode(cksum, i->second); - dump_header(s, cksum.header_name(), cksum.to_armor()); + if (multipart_parts_count && multipart_parts_count > 0) { + dump_header(s, cksum.header_name(), + fmt::format("{}-{}", cksum.to_armor(), *multipart_parts_count)); + } else { + dump_header(s, cksum.header_name(), cksum.to_armor()); + } } catch (buffer::error& err) { ldpp_dout(this, 0) << "ERROR: failed to decode rgw::cksum::Cksum" << dendl; @@ -2733,7 +2740,7 @@ void RGWPutObj_ObjStore_S3::send_response() dump_content_length(s, 0); dump_header_if_nonempty(s, "x-amz-version-id", version_id); dump_header_if_nonempty(s, "x-amz-expiration", expires); - if (cksum) { + if (cksum && cksum->aws()) { dump_header(s, cksum->header_name(), cksum->to_armor()); } for (auto &it : crypt_http_responses) @@ -2962,6 +2969,7 @@ int RGWPostObj_ObjStore_S3::get_params(optional_yield y) std::string v { rgw_trim_whitespace(std::string_view(d.c_str(), d.length())) }; rgw_set_amz_meta_header(s->info.crypt_attribute_map, p.first, v, OVERWRITE); } + int r = get_encryption_defaults(s); if (r < 0) { ldpp_dout(this, 5) << __func__ << "(): get_encryption_defaults() returned ret=" << r << dendl; @@ -4003,6 +4011,11 @@ int RGWInitMultipart_ObjStore_S3::get_params(optional_yield y) return -ERR_INVALID_REQUEST; } + auto algo_hdr = rgw::putobj::cksum_algorithm_hdr(*(s->info.env)); + if (algo_hdr.second) { + cksum_algo = rgw::cksum::parse_cksum_type(algo_hdr.second); + } + return 0; } @@ -4020,6 +4033,9 @@ void RGWInitMultipart_ObjStore_S3::send_response() dump_time_header(s, "x-amz-abort-date", abort_date); dump_header_if_nonempty(s, "x-amz-abort-rule-id", rule_id); } + if (cksum_algo != rgw::cksum::Type::none) { + dump_header(s, "x-amz-checksum-algorithm", safe_upcase_str(to_string(cksum_algo))); + } end_header(s, this, to_mime_type(s->format)); if (op_ret == 0) { dump_start(s); @@ -4082,6 +4098,9 @@ void RGWCompleteMultipart_ObjStore_S3::send_response() s->formatter->dump_string("Bucket", s->bucket_name); s->formatter->dump_string("Key", s->object->get_name()); s->formatter->dump_string("ETag", etag); + if (armored_cksum) { + s->formatter->dump_string(cksum->element_name(), *armored_cksum); + } s->formatter->close_section(); rgw_flush_formatter_and_reset(s, s->formatter); } @@ -4133,9 +4152,15 @@ void RGWListMultipart_ObjStore_S3::send_response() ACLOwner& owner = policy.get_owner(); dump_owner(s, owner.id, owner.display_name); + /* TODO: missing initiator: + Container element that identifies who initiated the multipart upload. If the initiator is an AWS account, this element provides the same information as the Owner element. If the initiator is an IAM User, this element provides the user ARN and display name, see https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListParts.html */ + + if (cksum && cksum->aws()) { + s->formatter->dump_string("ChecksumAlgorithm", safe_upcase_str(cksum->type_string())); + } + for (; iter != upload->get_parts().end(); ++iter) { rgw::sal::MultipartPart* part = iter->second.get(); - s->formatter->open_object_section("Part"); dump_time(s, "LastModified", part->get_mtime()); @@ -4143,6 +4168,11 @@ void RGWListMultipart_ObjStore_S3::send_response() s->formatter->dump_unsigned("PartNumber", part->get_num()); s->formatter->dump_format("ETag", "\"%s\"", part->get_etag().c_str()); s->formatter->dump_unsigned("Size", part->get_size()); + auto& part_cksum = part->get_cksum(); + if (part_cksum && part_cksum->aws()) { + s->formatter->dump_string(part_cksum->element_name(), + fmt::format("{}", part_cksum->to_armor())); + } s->formatter->close_section(); } s->formatter->close_section(); diff --git a/src/rgw/rgw_sal.h b/src/rgw/rgw_sal.h index 85c57ff7002..1701f1b3dc9 100644 --- a/src/rgw/rgw_sal.h +++ b/src/rgw/rgw_sal.h @@ -15,10 +15,12 @@ #pragma once +#include <optional> #include <boost/intrusive_ptr.hpp> #include <boost/smart_ptr/intrusive_ref_counter.hpp> #include "common/tracer.h" +#include "rgw_cksum.h" #include "rgw_sal_fwd.h" #include "rgw_lua.h" #include "rgw_notify_event_type.h" @@ -179,6 +181,7 @@ class ObjectProcessor : public DataProcessor { virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -1343,6 +1346,8 @@ public: virtual const std::string& get_etag() = 0; /** Get the modification time of this part */ virtual ceph::real_time& get_mtime() = 0; + /** Get computed (or default/empty) checksum */ + virtual const std::optional<rgw::cksum::Cksum>& get_cksum() = 0; }; /** @@ -1359,6 +1364,7 @@ public: //object lock std::optional<RGWObjectRetention> obj_retention = std::nullopt; std::optional<RGWObjectLegalHold> obj_legal_hold = std::nullopt; + rgw::cksum::Type cksum_type = rgw::cksum::Type::none; MultipartUpload() = default; virtual ~MultipartUpload() = default; @@ -1616,6 +1622,7 @@ public: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, diff --git a/src/rgw/rgw_sal_dbstore.cc b/src/rgw/rgw_sal_dbstore.cc index 4b27bd35f99..cb62ebeccee 100644 --- a/src/rgw/rgw_sal_dbstore.cc +++ b/src/rgw/rgw_sal_dbstore.cc @@ -1210,9 +1210,11 @@ namespace rgw::sal { return 0; } - int DBMultipartWriter::complete(size_t accounted_size, const std::string& etag, + int DBMultipartWriter::complete( + size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -1234,6 +1236,7 @@ namespace rgw::sal { RGWUploadPartInfo info; info.num = part_num; info.etag = etag; + info.cksum = cksum; info.size = total_data_size; info.accounted_size = accounted_size; info.modified = real_clock::now(); @@ -1368,6 +1371,7 @@ namespace rgw::sal { int DBAtomicWriter::complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, diff --git a/src/rgw/rgw_sal_dbstore.h b/src/rgw/rgw_sal_dbstore.h index defa5c2c50d..2dfc340315d 100644 --- a/src/rgw/rgw_sal_dbstore.h +++ b/src/rgw/rgw_sal_dbstore.h @@ -363,7 +363,9 @@ protected: virtual uint64_t get_size() { return info.accounted_size; } virtual const std::string& get_etag() { return info.etag; } virtual ceph::real_time& get_mtime() { return info.modified; } - + virtual const std::optional<rgw::cksum::Cksum>& get_cksum() { + return info.cksum; + } }; class DBMPObj { @@ -633,6 +635,7 @@ protected: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -682,6 +685,7 @@ public: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, diff --git a/src/rgw/rgw_sal_filter.cc b/src/rgw/rgw_sal_filter.cc index 391d7566249..8408620ba5c 100644 --- a/src/rgw/rgw_sal_filter.cc +++ b/src/rgw/rgw_sal_filter.cc @@ -1442,6 +1442,7 @@ int FilterWriter::process(bufferlist&& data, uint64_t offset) int FilterWriter::complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -1449,7 +1450,7 @@ int FilterWriter::complete(size_t accounted_size, const std::string& etag, const req_context& rctx, uint32_t flags) { - return next->complete(accounted_size, etag, mtime, set_mtime, attrs, + return next->complete(accounted_size, etag, mtime, set_mtime, attrs, cksum, delete_at, if_match, if_nomatch, user_data, zones_trace, canceled, rctx, flags); } diff --git a/src/rgw/rgw_sal_filter.h b/src/rgw/rgw_sal_filter.h index 6ff30041895..664b37fa6a9 100644 --- a/src/rgw/rgw_sal_filter.h +++ b/src/rgw/rgw_sal_filter.h @@ -878,6 +878,9 @@ public: virtual uint64_t get_size() override { return next->get_size(); } virtual const std::string& get_etag() override { return next->get_etag(); } virtual ceph::real_time& get_mtime() override { return next->get_mtime(); } + virtual const std::optional<rgw::cksum::Cksum>& get_cksum() { + return next->get_cksum(); + } }; class FilterMultipartUpload : public MultipartUpload { @@ -1048,6 +1051,7 @@ public: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, diff --git a/src/test/rgw/test_rgw_cksum.cc b/src/test/rgw/test_rgw_cksum.cc index 603197d41f9..d410ea27506 100644 --- a/src/test/rgw/test_rgw_cksum.cc +++ b/src/test/rgw/test_rgw_cksum.cc @@ -24,6 +24,7 @@ #include "common/ceph_argparse.h" #include "common/debug.h" #include "rgw/rgw_cksum.h" +#include "rgw/rgw_cksum_pipe.h" #include <openssl/sha.h> #include "rgw/rgw_hex.h" @@ -50,6 +51,17 @@ namespace { std::string dolor = R"(Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.)"; +TEST(RGWCksum, Ctor) +{ + cksum::Cksum ck1; + cksum::Cksum ck2(cksum::Type::none); + + auto ck3 = rgw::putobj::GetHeaderCksumResult(ck1, ""); + + ASSERT_EQ(ck1.to_armor(), ck2.to_armor()); + ASSERT_EQ(ck2.to_armor(), ck3.first.to_armor()); +} + TEST(RGWCksum, Output) { auto o_mode = std::ios::out|std::ios::trunc; |