summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMatan Breizman <mbreizma@redhat.com>2023-01-09 15:07:20 +0100
committerMatan Breizman <mbreizma@redhat.com>2024-01-04 16:18:11 +0100
commit883f99dd3e17f88fd90cdb6b7c221d3a63bce6b9 (patch)
tree782a4dec9216ed859611e33a9b18791d2442a1fa /src
parentMerge pull request #55037 from ronen-fr/wip-rf-crush-dummy (diff)
downloadceph-883f99dd3e17f88fd90cdb6b7c221d3a63bce6b9.tar.xz
ceph-883f99dd3e17f88fd90cdb6b7c221d3a63bce6b9.zip
crimson/osd/object_metadata_helper: Introduce calc_*_subsets
Signed-off-by: Matan Breizman <mbreizma@redhat.com>
Diffstat (limited to '')
-rw-r--r--src/crimson/osd/CMakeLists.txt1
-rw-r--r--src/crimson/osd/object_metadata_helper.cc222
-rw-r--r--src/crimson/osd/object_metadata_helper.h24
-rw-r--r--src/crimson/osd/replicated_recovery_backend.h1
-rw-r--r--src/test/crimson/CMakeLists.txt7
-rw-r--r--src/test/crimson/test_calc_subsets.cc255
6 files changed, 510 insertions, 0 deletions
diff --git a/src/crimson/osd/CMakeLists.txt b/src/crimson/osd/CMakeLists.txt
index 65fb7201f76..c77da7575a5 100644
--- a/src/crimson/osd/CMakeLists.txt
+++ b/src/crimson/osd/CMakeLists.txt
@@ -15,6 +15,7 @@ add_executable(crimson-osd
pg_shard_manager.cc
object_context.cc
object_context_loader.cc
+ object_metadata_helper.cc
ops_executer.cc
osd_operation.cc
osd_operations/client_request.cc
diff --git a/src/crimson/osd/object_metadata_helper.cc b/src/crimson/osd/object_metadata_helper.cc
new file mode 100644
index 00000000000..3016b0ae406
--- /dev/null
+++ b/src/crimson/osd/object_metadata_helper.cc
@@ -0,0 +1,222 @@
+#include "crimson/osd/object_metadata_helper.h"
+
+namespace {
+ seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_osd);
+ }
+}
+
+namespace crimson::osd {
+
+/*
+ * The clone object content may already overlap with the
+ * next older and the next newest clone obejct.
+ * Use the existing (next) clones object overlaps instead
+ * of pushing the whole clone object to the replica.
+ */
+
+subsets_t calc_clone_subsets(
+ SnapSet& snapset, const hobject_t& soid,
+ const pg_missing_t& missing,
+ const hobject_t &last_backfill)
+{
+ subsets_t subsets;
+ logger().debug("{}: {} clone_overlap {} ",
+ __func__, soid, snapset.clone_overlap);
+
+ uint64_t size = snapset.clone_size[soid.snap];
+ if (size) {
+ subsets.data_subset.insert(0, size);
+ }
+
+ // TODO: make sure CEPH_FEATURE_OSD_CACHEPOOL is not supported in Crimson
+ // Skips clone subsets if caching was enabled (allow_incomplete_clones).
+
+#ifndef UNIT_TESTS_BUILT
+ if (!crimson::common::local_conf()->osd_recover_clone_overlap) {
+ logger().debug("{} {} -- osd_recover_clone_overlap is disabled",
+ __func__, soid); ;
+ return subsets;
+ }
+#endif
+
+ if (snapset.clones.empty()) {
+ logger().debug("{} {} -- no clones", __func__, soid);
+ return subsets;
+ }
+
+ auto soid_snap_iter = find(snapset.clones.begin(),
+ snapset.clones.end(),
+ soid.snap);
+ assert(soid_snap_iter != snapset.clones.end());
+ auto soid_snap_index = soid_snap_iter - snapset.clones.begin();
+
+ // any overlap with next older clone?
+ interval_set<uint64_t> cloning;
+ interval_set<uint64_t> prev;
+ if (size) {
+ prev.insert(0, size);
+ }
+ for (int i = soid_snap_index - 1; i >= 0; i--) {
+ hobject_t clone = soid;
+ clone.snap = snapset.clones[i];
+ // clone_overlap of i holds the overlap between i to i+1
+ prev.intersection_of(snapset.clone_overlap[snapset.clones[i]]);
+ if (!missing.is_missing(clone) && clone < last_backfill) {
+ logger().debug("{} {} has prev {} overlap {}",
+ __func__, soid, clone, prev);
+ subsets.clone_subsets[clone] = prev;
+ cloning.union_of(prev);
+ break;
+ }
+ logger().debug("{} {} does not have prev {} overlap {}",
+ __func__, soid, clone, prev);
+ }
+
+ // overlap with next newest?
+ interval_set<uint64_t> next;
+ if (size) {
+ next.insert(0, size);
+ }
+ for (unsigned i = soid_snap_index+1;
+ i < snapset.clones.size(); i++) {
+ hobject_t clone = soid;
+ clone.snap = snapset.clones[i];
+ // clone_overlap of i-1 holds the overlap between i-1 to i
+ next.intersection_of(snapset.clone_overlap[snapset.clones[i - 1]]);
+ if (!missing.is_missing(clone) && clone < last_backfill) {
+ logger().debug("{} {} has next {} overlap {}",
+ __func__, soid, clone, next);
+ subsets.clone_subsets[clone] = next;
+ cloning.union_of(next);
+ break;
+ }
+ logger().debug("{} {} does not have next {} overlap {}",
+ __func__, soid, clone, next);
+ }
+
+#ifndef UNIT_TESTS_BUILT
+ if (cloning.num_intervals() >
+ crimson::common::local_conf().get_val<uint64_t>
+ ("osd_recover_clone_overlap_limit")) {
+ logger().debug("skipping clone, too many holes");
+ subsets.clone_subsets.clear();
+ cloning.clear();
+ }
+#endif
+
+ // what's left for us to push?
+ subsets.data_subset.subtract(cloning);
+ logger().debug("{} {} data_subsets {}"
+ "clone_subsets {}",
+ __func__, soid, subsets.data_subset, subsets.clone_subsets);
+ return subsets;
+}
+
+/*
+ * Instead of pushing the whole object to the replica,
+ * make use of:
+ * 1) ObjectCleanRegion - push modified content only.
+ * - See: dev/osd_internals/partial_object_recovery
+ * 2) The modified content may already overlap with the
+ * next older clone obejct. Use the existing clone
+ * object overlap as well.
+ */
+
+subsets_t calc_head_subsets(
+ uint64_t obj_size,
+ SnapSet& snapset,
+ const hobject_t& head,
+ const pg_missing_t& missing,
+ const hobject_t &last_backfill)
+{
+ logger().debug("{}: {} clone_overlap {} ",
+ __func__, head, snapset.clone_overlap);
+
+ subsets_t subsets;
+
+// 1) Calculate modified content only
+ if (obj_size) {
+ subsets.data_subset.insert(0, obj_size);
+ }
+ assert(missing.get_items().contains(head));
+ const pg_missing_item missing_item = missing.get_items().at(head);
+ // let data_subset store only the modified content of the object.
+ subsets.data_subset.intersection_of(missing_item.clean_regions.get_dirty_regions());
+ logger().debug("{} {} data_subset {}",
+ __func__, head, subsets.data_subset);
+
+ // TODO: make sure CEPH_FEATURE_OSD_CACHEPOOL is not supported in Crimson
+ // Skips clone subsets if caching was enabled (allow_incomplete_clones).
+
+#ifndef UNIT_TESTS_BUILT
+ if (!crimson::common::local_conf()->osd_recover_clone_overlap) {
+ logger().debug("{} {} -- osd_recover_clone_overlap is disabled",
+ __func__, head);
+ return subsets;
+ }
+#endif
+
+ if (snapset.clones.empty()) {
+ logger().debug("{} {} -- no clones", __func__, head);
+ return subsets;
+ }
+
+ // 2) Find any overlap with next older clone
+ interval_set<uint64_t> cloning;
+ interval_set<uint64_t> prev;
+ hobject_t clone = head;
+ if (obj_size) {
+ prev.insert(0, obj_size);
+ }
+ for (int i = snapset.clones.size()-1; i >= 0; i--) {
+ clone.snap = snapset.clones[i];
+ // let prev store only the overlap with clone i
+ prev.intersection_of(snapset.clone_overlap[snapset.clones[i]]);
+ if (!missing.is_missing(clone) && clone < last_backfill) {
+ logger().debug("{} {} has prev {} overlap {}",
+ __func__, head, clone, prev);
+ cloning = prev;
+ break;
+ }
+ logger().debug("{} {} does not have prev {} overlap {}",
+ __func__, head, clone, prev);
+ }
+
+ // let cloning store only the overlap with data_subset
+ cloning.intersection_of(subsets.data_subset);
+ if (cloning.empty()) {
+ logger().debug("skipping clone, nothing needs to clone");
+ return subsets;
+ }
+
+#ifndef UNIT_TESTS_BUILT
+ if (cloning.num_intervals() >
+ crimson::common::local_conf().get_val<uint64_t>
+ ("osd_recover_clone_overlap_limit")) {
+ logger().debug("skipping clone, too many holes");
+ subsets.clone_subsets.clear();
+ cloning.clear();
+ }
+#endif
+
+ // what's left for us to push?
+ subsets.clone_subsets[clone] = cloning;
+ subsets.data_subset.subtract(cloning);
+ logger().debug("{} {} data_subsets {}"
+ "clone_subsets {}",
+ __func__, head, subsets.data_subset, subsets.clone_subsets);
+
+ return subsets;
+}
+
+void set_subsets(
+ const subsets_t& subsets,
+ ObjectRecoveryInfo& recovery_info)
+{
+ recovery_info.copy_subset = subsets.data_subset;
+ recovery_info.clone_subset = subsets.clone_subsets;
+}
+
+
+}
diff --git a/src/crimson/osd/object_metadata_helper.h b/src/crimson/osd/object_metadata_helper.h
new file mode 100644
index 00000000000..927fc488811
--- /dev/null
+++ b/src/crimson/osd/object_metadata_helper.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "osd/osd_types_fmt.h"
+
+namespace crimson::osd {
+ struct subsets_t {
+ interval_set<uint64_t> data_subset;
+ std::map<hobject_t, interval_set<uint64_t>> clone_subsets;
+ };
+
+ subsets_t calc_clone_subsets(
+ SnapSet& snapset, const hobject_t& soid,
+ const pg_missing_t& missing,
+ const hobject_t &last_backfill);
+ subsets_t calc_head_subsets(
+ uint64_t obj_size,
+ SnapSet& snapset,
+ const hobject_t& head,
+ const pg_missing_t& missing,
+ const hobject_t &last_backfill);
+ void set_subsets(
+ const subsets_t& subsets,
+ ObjectRecoveryInfo& recovery_info);
+}
diff --git a/src/crimson/osd/replicated_recovery_backend.h b/src/crimson/osd/replicated_recovery_backend.h
index 16d6369a91f..8fdaf9a10f8 100644
--- a/src/crimson/osd/replicated_recovery_backend.h
+++ b/src/crimson/osd/replicated_recovery_backend.h
@@ -6,6 +6,7 @@
#include "crimson/common/interruptible_future.h"
#include "crimson/osd/pg_interval_interrupt_condition.h"
#include "crimson/osd/recovery_backend.h"
+#include "crimson/osd/object_metadata_helper.h"
#include "messages/MOSDPGPull.h"
#include "messages/MOSDPGPush.h"
diff --git a/src/test/crimson/CMakeLists.txt b/src/test/crimson/CMakeLists.txt
index c943ff88546..e1a5dfe73df 100644
--- a/src/test/crimson/CMakeLists.txt
+++ b/src/test/crimson/CMakeLists.txt
@@ -71,6 +71,13 @@ add_ceph_unittest(unittest-seastar-lru
--memory 256M --smp 1)
target_link_libraries(unittest-seastar-lru crimson GTest::Main)
+add_executable(unittest-seastar-calc-subsets
+ ${PROJECT_SOURCE_DIR}/src/crimson/osd/object_metadata_helper.cc
+ test_calc_subsets.cc)
+add_ceph_unittest(unittest-seastar-calc-subsets
+ --memory 256M --smp 1)
+target_link_libraries(unittest-seastar-calc-subsets crimson GTest::Main)
+
add_executable(unittest-fixed-kv-node-layout
test_fixed_kv_node_layout.cc)
add_ceph_unittest(unittest-fixed-kv-node-layout)
diff --git a/src/test/crimson/test_calc_subsets.cc b/src/test/crimson/test_calc_subsets.cc
new file mode 100644
index 00000000000..7d23810c2e6
--- /dev/null
+++ b/src/test/crimson/test_calc_subsets.cc
@@ -0,0 +1,255 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "gtest/gtest.h"
+#include "crimson/osd/object_metadata_helper.h"
+
+
+TEST(head_subsets, dirty_region)
+{
+ uint64_t obj_size = 10;
+ SnapSet empty_ss;
+ hobject_t head{object_t{"foo"}, "foo", CEPH_NOSNAP, 42, 0, "nspace"};
+ pg_missing_t missing;
+ pg_missing_item item;
+ uint64_t offset_1, len_1;
+ offset_1 = 3;
+ len_1 = 2;
+ item.clean_regions.mark_data_region_dirty(offset_1, len_1);
+ missing.add(head, std::move(item));
+ hobject_t last_backfill{object_t{"foo1"}, "foo1", CEPH_NOSNAP, 42, 0, "nspace"};
+ interval_set<uint64_t> expect_data_region;
+ expect_data_region.insert(offset_1, len_1);
+
+// ****
+
+ crimson::osd::subsets_t result =
+ crimson::osd::calc_head_subsets(obj_size,
+ empty_ss,
+ head,
+ missing,
+ last_backfill);
+
+ EXPECT_TRUE(result.clone_subsets.empty());
+ EXPECT_TRUE(result.data_subset == expect_data_region);
+}
+
+TEST(head_subsets, head_all_clean)
+{
+ uint64_t obj_size = 10;
+ SnapSet empty_ss;
+ hobject_t head{object_t{"foo"}, "foo", CEPH_NOSNAP, 42, 0, "nspace"};
+ pg_missing_t missing;
+ pg_missing_item item;
+ missing.add(head, std::move(item));
+ hobject_t last_backfill{object_t{"foo1"}, "foo1", CEPH_NOSNAP, 42, 0, "nspace"};
+
+// ****
+
+ crimson::osd::subsets_t result =
+ crimson::osd::calc_head_subsets(obj_size,
+ empty_ss,
+ head,
+ missing,
+ last_backfill);
+
+ EXPECT_TRUE(result.clone_subsets.empty());
+ EXPECT_TRUE(result.data_subset.empty());
+}
+
+TEST(head_subsets, all_dirty)
+{
+ uint64_t obj_size = 10;
+ SnapSet empty_ss;
+ hobject_t head{object_t{"foo"}, "foo", CEPH_NOSNAP, 42, 0, "nspace"};
+ pg_missing_t missing;
+ pg_missing_item item;
+ item.clean_regions.mark_fully_dirty();
+ missing.add(head, std::move(item));
+ hobject_t last_backfill{object_t{"foo1"}, "foo1", CEPH_NOSNAP, 42, 0, "nspace"};
+
+// ****
+
+ crimson::osd::subsets_t result =
+ crimson::osd::calc_head_subsets(obj_size,
+ empty_ss,
+ head,
+ missing,
+ last_backfill);
+
+ EXPECT_TRUE(result.clone_subsets.empty());
+ EXPECT_TRUE(result.data_subset.size() == obj_size);
+}
+
+TEST(head_subsets, clone_overlap)
+{
+ uint64_t obj_size = 10;
+ SnapSet ss;
+ hobject_t head{object_t{"foo"}, "foo", CEPH_NOSNAP, 42, 0, "nspace"};
+ pg_missing_t missing;
+ pg_missing_item item;
+ item.clean_regions.mark_fully_dirty();
+ missing.add(head, std::move(item));
+ hobject_t last_backfill{object_t{"foo1"}, "foo1", CEPH_NOSNAP, 42, 0, "nspace"};
+
+ // Clone object:
+ hobject_t clone = head;
+ clone.snap = 0;
+ std::map<snapid_t, interval_set<uint64_t>> clone_overlap; // overlap w/ next
+ interval_set<uint64_t> overlap;
+ uint64_t offset_2, len_2;
+ offset_2 = 2;
+ len_2 = 2;
+ overlap.insert(offset_2, len_2);
+ clone_overlap[clone.snap] = overlap;
+
+ // Snapset:
+ // ss.seq = 0;
+ // ss.snaps = snaps; (legacy)
+ ss.clones.push_back(clone.snap);
+ ss.clone_overlap = clone_overlap;
+ // ss.clone_size = clone_size;
+ // ss.clone_snaps = clone_snaps;
+
+ // Expected intervals:
+ interval_set<uint64_t> expect_clone_subset;
+ expect_clone_subset.insert(offset_2, len_2);
+
+// ****
+
+ crimson::osd::subsets_t result =
+ crimson::osd::calc_head_subsets(obj_size,
+ ss,
+ head,
+ missing,
+ last_backfill);
+ EXPECT_TRUE(result.clone_subsets[clone] == expect_clone_subset);
+}
+
+TEST(head_subsets, dirty_region_and_clone_overlap)
+{
+ uint64_t obj_size = 100;
+ SnapSet ss;
+ hobject_t head{object_t{"foo"}, "foo", CEPH_NOSNAP, 42, 0, "nspace"};
+ pg_missing_t missing;
+ pg_missing_item item;
+ uint64_t offset_1, len_1;
+ offset_1 = 3;
+ len_1 = 2;
+ item.clean_regions.mark_data_region_dirty(offset_1, len_1);
+ missing.add(head, std::move(item));
+ hobject_t last_backfill{object_t{"foo1"}, "foo1", CEPH_NOSNAP, 42, 0, "nspace"};
+ interval_set<uint64_t> expect_data_region;
+ expect_data_region.insert(offset_1, len_1);
+
+ // Clone object:
+ hobject_t clone = head;
+ clone.snap = 0;
+ std::map<snapid_t, interval_set<uint64_t>> clone_overlap; // overlap w/ next
+ interval_set<uint64_t> overlap;
+ uint64_t offset_2, len_2;
+ offset_2 = 2;
+ len_2 = 2;
+ overlap.insert(offset_2, len_2);
+ clone_overlap[clone.snap] = overlap;
+
+ // Snapset:
+ // ss.seq = 0;
+ // ss.snaps = snaps; (legacy)
+ ss.clones.push_back(clone.snap);
+ ss.clone_overlap = clone_overlap;
+ // ss.clone_size = clone_size;
+ // ss.clone_snaps = clone_snaps;
+
+ // Expected intervals:
+ interval_set<uint64_t> expect_clone_subset;
+ expect_clone_subset.insert(offset_2, len_2);
+ expect_clone_subset.intersection_of(expect_data_region);
+ expect_data_region.subtract(expect_clone_subset);
+
+// ****
+
+ crimson::osd::subsets_t result =
+ crimson::osd::calc_head_subsets(obj_size,
+ ss,
+ head,
+ missing,
+ last_backfill);
+ EXPECT_TRUE(result.clone_subsets[clone] == expect_clone_subset);
+ EXPECT_TRUE(result.data_subset == expect_data_region);
+}
+
+TEST(clone_subsets, overlap)
+{
+ uint64_t clone_size = 10;
+ SnapSet ss;
+ hobject_t clone{object_t{"foo"}, "foo", 1, 42, 0, "nspace"};
+ ss.clone_size[1] = clone_size;
+ ss.clones.push_back(snapid_t(0));
+ ss.clones.push_back(snapid_t(1));
+ ss.clones.push_back(snapid_t(2));
+ pg_missing_t missing;
+ pg_missing_item item;
+ missing.add(clone, std::move(item));
+ hobject_t last_backfill{object_t{"foo1"}, "foo1", CEPH_NOSNAP, 42, 0, "nspace"};
+
+ interval_set<uint64_t> expect_clone_subset1, expect_clone_subset2;
+
+ // Next older clone:
+ hobject_t older_clone = clone;
+ older_clone.snap = 0;
+ {
+ std::map<snapid_t, interval_set<uint64_t>> clone_overlap; // overlap w/ next
+ interval_set<uint64_t> overlap;
+ uint64_t offset_2, len_2;
+ offset_2 = 4;
+ len_2 = 2;
+ overlap.insert(offset_2, len_2);
+ ss.clone_overlap[older_clone.snap] = overlap;
+
+ // Snapset:
+ // ss.seq = 0;
+ // ss.snaps = snaps; (legacy)
+ // ss.clones.push_back(snapid_t());
+ // ss.clone_overlap = clone_overlap;
+ // ss.clone_size = clone_size;
+ // ss.clone_snaps = clone_snaps;
+
+ // Expected intervals:
+ expect_clone_subset1.insert(offset_2, len_2);
+ }
+
+ // Next newest clone:
+ hobject_t newest_clone = clone;
+ newest_clone.snap = 2;
+ {
+ std::map<snapid_t, interval_set<uint64_t>> clone_overlap; // overlap w/ next
+ interval_set<uint64_t> overlap;
+ uint64_t offset_2, len_2;
+ offset_2 = 2;
+ len_2 = 2;
+ overlap.insert(offset_2, len_2);
+ ss.clone_overlap[newest_clone.snap - 1] = overlap;
+
+ // Snapset:
+ // ss.seq = 0;
+ // ss.snaps = snaps; (legacy)
+ // ss.clones.push_back(snapid_t());
+ // ss.clone_overlap = clone_overlap;
+ // ss.clone_size = clone_size;
+ // ss.clone_snaps = clone_snaps;
+
+ // Expected intervals:
+ expect_clone_subset2.insert(offset_2, len_2);
+ }
+
+// ****
+
+ crimson::osd::subsets_t result =
+ crimson::osd::calc_clone_subsets(ss,
+ clone,
+ missing,
+ last_backfill);
+ EXPECT_TRUE(result.clone_subsets[older_clone] == expect_clone_subset1);
+ EXPECT_TRUE(result.clone_subsets[newest_clone] == expect_clone_subset2);
+}