diff options
author | Matan Breizman <mbreizma@redhat.com> | 2023-01-09 15:07:20 +0100 |
---|---|---|
committer | Matan Breizman <mbreizma@redhat.com> | 2024-01-04 16:18:11 +0100 |
commit | 883f99dd3e17f88fd90cdb6b7c221d3a63bce6b9 (patch) | |
tree | 782a4dec9216ed859611e33a9b18791d2442a1fa /src | |
parent | Merge pull request #55037 from ronen-fr/wip-rf-crush-dummy (diff) | |
download | ceph-883f99dd3e17f88fd90cdb6b7c221d3a63bce6b9.tar.xz ceph-883f99dd3e17f88fd90cdb6b7c221d3a63bce6b9.zip |
crimson/osd/object_metadata_helper: Introduce calc_*_subsets
Signed-off-by: Matan Breizman <mbreizma@redhat.com>
Diffstat (limited to '')
-rw-r--r-- | src/crimson/osd/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/crimson/osd/object_metadata_helper.cc | 222 | ||||
-rw-r--r-- | src/crimson/osd/object_metadata_helper.h | 24 | ||||
-rw-r--r-- | src/crimson/osd/replicated_recovery_backend.h | 1 | ||||
-rw-r--r-- | src/test/crimson/CMakeLists.txt | 7 | ||||
-rw-r--r-- | src/test/crimson/test_calc_subsets.cc | 255 |
6 files changed, 510 insertions, 0 deletions
diff --git a/src/crimson/osd/CMakeLists.txt b/src/crimson/osd/CMakeLists.txt index 65fb7201f76..c77da7575a5 100644 --- a/src/crimson/osd/CMakeLists.txt +++ b/src/crimson/osd/CMakeLists.txt @@ -15,6 +15,7 @@ add_executable(crimson-osd pg_shard_manager.cc object_context.cc object_context_loader.cc + object_metadata_helper.cc ops_executer.cc osd_operation.cc osd_operations/client_request.cc diff --git a/src/crimson/osd/object_metadata_helper.cc b/src/crimson/osd/object_metadata_helper.cc new file mode 100644 index 00000000000..3016b0ae406 --- /dev/null +++ b/src/crimson/osd/object_metadata_helper.cc @@ -0,0 +1,222 @@ +#include "crimson/osd/object_metadata_helper.h" + +namespace { + seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_osd); + } +} + +namespace crimson::osd { + +/* + * The clone object content may already overlap with the + * next older and the next newest clone obejct. + * Use the existing (next) clones object overlaps instead + * of pushing the whole clone object to the replica. + */ + +subsets_t calc_clone_subsets( + SnapSet& snapset, const hobject_t& soid, + const pg_missing_t& missing, + const hobject_t &last_backfill) +{ + subsets_t subsets; + logger().debug("{}: {} clone_overlap {} ", + __func__, soid, snapset.clone_overlap); + + uint64_t size = snapset.clone_size[soid.snap]; + if (size) { + subsets.data_subset.insert(0, size); + } + + // TODO: make sure CEPH_FEATURE_OSD_CACHEPOOL is not supported in Crimson + // Skips clone subsets if caching was enabled (allow_incomplete_clones). + +#ifndef UNIT_TESTS_BUILT + if (!crimson::common::local_conf()->osd_recover_clone_overlap) { + logger().debug("{} {} -- osd_recover_clone_overlap is disabled", + __func__, soid); ; + return subsets; + } +#endif + + if (snapset.clones.empty()) { + logger().debug("{} {} -- no clones", __func__, soid); + return subsets; + } + + auto soid_snap_iter = find(snapset.clones.begin(), + snapset.clones.end(), + soid.snap); + assert(soid_snap_iter != snapset.clones.end()); + auto soid_snap_index = soid_snap_iter - snapset.clones.begin(); + + // any overlap with next older clone? + interval_set<uint64_t> cloning; + interval_set<uint64_t> prev; + if (size) { + prev.insert(0, size); + } + for (int i = soid_snap_index - 1; i >= 0; i--) { + hobject_t clone = soid; + clone.snap = snapset.clones[i]; + // clone_overlap of i holds the overlap between i to i+1 + prev.intersection_of(snapset.clone_overlap[snapset.clones[i]]); + if (!missing.is_missing(clone) && clone < last_backfill) { + logger().debug("{} {} has prev {} overlap {}", + __func__, soid, clone, prev); + subsets.clone_subsets[clone] = prev; + cloning.union_of(prev); + break; + } + logger().debug("{} {} does not have prev {} overlap {}", + __func__, soid, clone, prev); + } + + // overlap with next newest? + interval_set<uint64_t> next; + if (size) { + next.insert(0, size); + } + for (unsigned i = soid_snap_index+1; + i < snapset.clones.size(); i++) { + hobject_t clone = soid; + clone.snap = snapset.clones[i]; + // clone_overlap of i-1 holds the overlap between i-1 to i + next.intersection_of(snapset.clone_overlap[snapset.clones[i - 1]]); + if (!missing.is_missing(clone) && clone < last_backfill) { + logger().debug("{} {} has next {} overlap {}", + __func__, soid, clone, next); + subsets.clone_subsets[clone] = next; + cloning.union_of(next); + break; + } + logger().debug("{} {} does not have next {} overlap {}", + __func__, soid, clone, next); + } + +#ifndef UNIT_TESTS_BUILT + if (cloning.num_intervals() > + crimson::common::local_conf().get_val<uint64_t> + ("osd_recover_clone_overlap_limit")) { + logger().debug("skipping clone, too many holes"); + subsets.clone_subsets.clear(); + cloning.clear(); + } +#endif + + // what's left for us to push? + subsets.data_subset.subtract(cloning); + logger().debug("{} {} data_subsets {}" + "clone_subsets {}", + __func__, soid, subsets.data_subset, subsets.clone_subsets); + return subsets; +} + +/* + * Instead of pushing the whole object to the replica, + * make use of: + * 1) ObjectCleanRegion - push modified content only. + * - See: dev/osd_internals/partial_object_recovery + * 2) The modified content may already overlap with the + * next older clone obejct. Use the existing clone + * object overlap as well. + */ + +subsets_t calc_head_subsets( + uint64_t obj_size, + SnapSet& snapset, + const hobject_t& head, + const pg_missing_t& missing, + const hobject_t &last_backfill) +{ + logger().debug("{}: {} clone_overlap {} ", + __func__, head, snapset.clone_overlap); + + subsets_t subsets; + +// 1) Calculate modified content only + if (obj_size) { + subsets.data_subset.insert(0, obj_size); + } + assert(missing.get_items().contains(head)); + const pg_missing_item missing_item = missing.get_items().at(head); + // let data_subset store only the modified content of the object. + subsets.data_subset.intersection_of(missing_item.clean_regions.get_dirty_regions()); + logger().debug("{} {} data_subset {}", + __func__, head, subsets.data_subset); + + // TODO: make sure CEPH_FEATURE_OSD_CACHEPOOL is not supported in Crimson + // Skips clone subsets if caching was enabled (allow_incomplete_clones). + +#ifndef UNIT_TESTS_BUILT + if (!crimson::common::local_conf()->osd_recover_clone_overlap) { + logger().debug("{} {} -- osd_recover_clone_overlap is disabled", + __func__, head); + return subsets; + } +#endif + + if (snapset.clones.empty()) { + logger().debug("{} {} -- no clones", __func__, head); + return subsets; + } + + // 2) Find any overlap with next older clone + interval_set<uint64_t> cloning; + interval_set<uint64_t> prev; + hobject_t clone = head; + if (obj_size) { + prev.insert(0, obj_size); + } + for (int i = snapset.clones.size()-1; i >= 0; i--) { + clone.snap = snapset.clones[i]; + // let prev store only the overlap with clone i + prev.intersection_of(snapset.clone_overlap[snapset.clones[i]]); + if (!missing.is_missing(clone) && clone < last_backfill) { + logger().debug("{} {} has prev {} overlap {}", + __func__, head, clone, prev); + cloning = prev; + break; + } + logger().debug("{} {} does not have prev {} overlap {}", + __func__, head, clone, prev); + } + + // let cloning store only the overlap with data_subset + cloning.intersection_of(subsets.data_subset); + if (cloning.empty()) { + logger().debug("skipping clone, nothing needs to clone"); + return subsets; + } + +#ifndef UNIT_TESTS_BUILT + if (cloning.num_intervals() > + crimson::common::local_conf().get_val<uint64_t> + ("osd_recover_clone_overlap_limit")) { + logger().debug("skipping clone, too many holes"); + subsets.clone_subsets.clear(); + cloning.clear(); + } +#endif + + // what's left for us to push? + subsets.clone_subsets[clone] = cloning; + subsets.data_subset.subtract(cloning); + logger().debug("{} {} data_subsets {}" + "clone_subsets {}", + __func__, head, subsets.data_subset, subsets.clone_subsets); + + return subsets; +} + +void set_subsets( + const subsets_t& subsets, + ObjectRecoveryInfo& recovery_info) +{ + recovery_info.copy_subset = subsets.data_subset; + recovery_info.clone_subset = subsets.clone_subsets; +} + + +} diff --git a/src/crimson/osd/object_metadata_helper.h b/src/crimson/osd/object_metadata_helper.h new file mode 100644 index 00000000000..927fc488811 --- /dev/null +++ b/src/crimson/osd/object_metadata_helper.h @@ -0,0 +1,24 @@ +#pragma once + +#include "osd/osd_types_fmt.h" + +namespace crimson::osd { + struct subsets_t { + interval_set<uint64_t> data_subset; + std::map<hobject_t, interval_set<uint64_t>> clone_subsets; + }; + + subsets_t calc_clone_subsets( + SnapSet& snapset, const hobject_t& soid, + const pg_missing_t& missing, + const hobject_t &last_backfill); + subsets_t calc_head_subsets( + uint64_t obj_size, + SnapSet& snapset, + const hobject_t& head, + const pg_missing_t& missing, + const hobject_t &last_backfill); + void set_subsets( + const subsets_t& subsets, + ObjectRecoveryInfo& recovery_info); +} diff --git a/src/crimson/osd/replicated_recovery_backend.h b/src/crimson/osd/replicated_recovery_backend.h index 16d6369a91f..8fdaf9a10f8 100644 --- a/src/crimson/osd/replicated_recovery_backend.h +++ b/src/crimson/osd/replicated_recovery_backend.h @@ -6,6 +6,7 @@ #include "crimson/common/interruptible_future.h" #include "crimson/osd/pg_interval_interrupt_condition.h" #include "crimson/osd/recovery_backend.h" +#include "crimson/osd/object_metadata_helper.h" #include "messages/MOSDPGPull.h" #include "messages/MOSDPGPush.h" diff --git a/src/test/crimson/CMakeLists.txt b/src/test/crimson/CMakeLists.txt index c943ff88546..e1a5dfe73df 100644 --- a/src/test/crimson/CMakeLists.txt +++ b/src/test/crimson/CMakeLists.txt @@ -71,6 +71,13 @@ add_ceph_unittest(unittest-seastar-lru --memory 256M --smp 1) target_link_libraries(unittest-seastar-lru crimson GTest::Main) +add_executable(unittest-seastar-calc-subsets + ${PROJECT_SOURCE_DIR}/src/crimson/osd/object_metadata_helper.cc + test_calc_subsets.cc) +add_ceph_unittest(unittest-seastar-calc-subsets + --memory 256M --smp 1) +target_link_libraries(unittest-seastar-calc-subsets crimson GTest::Main) + add_executable(unittest-fixed-kv-node-layout test_fixed_kv_node_layout.cc) add_ceph_unittest(unittest-fixed-kv-node-layout) diff --git a/src/test/crimson/test_calc_subsets.cc b/src/test/crimson/test_calc_subsets.cc new file mode 100644 index 00000000000..7d23810c2e6 --- /dev/null +++ b/src/test/crimson/test_calc_subsets.cc @@ -0,0 +1,255 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "gtest/gtest.h" +#include "crimson/osd/object_metadata_helper.h" + + +TEST(head_subsets, dirty_region) +{ + uint64_t obj_size = 10; + SnapSet empty_ss; + hobject_t head{object_t{"foo"}, "foo", CEPH_NOSNAP, 42, 0, "nspace"}; + pg_missing_t missing; + pg_missing_item item; + uint64_t offset_1, len_1; + offset_1 = 3; + len_1 = 2; + item.clean_regions.mark_data_region_dirty(offset_1, len_1); + missing.add(head, std::move(item)); + hobject_t last_backfill{object_t{"foo1"}, "foo1", CEPH_NOSNAP, 42, 0, "nspace"}; + interval_set<uint64_t> expect_data_region; + expect_data_region.insert(offset_1, len_1); + +// **** + + crimson::osd::subsets_t result = + crimson::osd::calc_head_subsets(obj_size, + empty_ss, + head, + missing, + last_backfill); + + EXPECT_TRUE(result.clone_subsets.empty()); + EXPECT_TRUE(result.data_subset == expect_data_region); +} + +TEST(head_subsets, head_all_clean) +{ + uint64_t obj_size = 10; + SnapSet empty_ss; + hobject_t head{object_t{"foo"}, "foo", CEPH_NOSNAP, 42, 0, "nspace"}; + pg_missing_t missing; + pg_missing_item item; + missing.add(head, std::move(item)); + hobject_t last_backfill{object_t{"foo1"}, "foo1", CEPH_NOSNAP, 42, 0, "nspace"}; + +// **** + + crimson::osd::subsets_t result = + crimson::osd::calc_head_subsets(obj_size, + empty_ss, + head, + missing, + last_backfill); + + EXPECT_TRUE(result.clone_subsets.empty()); + EXPECT_TRUE(result.data_subset.empty()); +} + +TEST(head_subsets, all_dirty) +{ + uint64_t obj_size = 10; + SnapSet empty_ss; + hobject_t head{object_t{"foo"}, "foo", CEPH_NOSNAP, 42, 0, "nspace"}; + pg_missing_t missing; + pg_missing_item item; + item.clean_regions.mark_fully_dirty(); + missing.add(head, std::move(item)); + hobject_t last_backfill{object_t{"foo1"}, "foo1", CEPH_NOSNAP, 42, 0, "nspace"}; + +// **** + + crimson::osd::subsets_t result = + crimson::osd::calc_head_subsets(obj_size, + empty_ss, + head, + missing, + last_backfill); + + EXPECT_TRUE(result.clone_subsets.empty()); + EXPECT_TRUE(result.data_subset.size() == obj_size); +} + +TEST(head_subsets, clone_overlap) +{ + uint64_t obj_size = 10; + SnapSet ss; + hobject_t head{object_t{"foo"}, "foo", CEPH_NOSNAP, 42, 0, "nspace"}; + pg_missing_t missing; + pg_missing_item item; + item.clean_regions.mark_fully_dirty(); + missing.add(head, std::move(item)); + hobject_t last_backfill{object_t{"foo1"}, "foo1", CEPH_NOSNAP, 42, 0, "nspace"}; + + // Clone object: + hobject_t clone = head; + clone.snap = 0; + std::map<snapid_t, interval_set<uint64_t>> clone_overlap; // overlap w/ next + interval_set<uint64_t> overlap; + uint64_t offset_2, len_2; + offset_2 = 2; + len_2 = 2; + overlap.insert(offset_2, len_2); + clone_overlap[clone.snap] = overlap; + + // Snapset: + // ss.seq = 0; + // ss.snaps = snaps; (legacy) + ss.clones.push_back(clone.snap); + ss.clone_overlap = clone_overlap; + // ss.clone_size = clone_size; + // ss.clone_snaps = clone_snaps; + + // Expected intervals: + interval_set<uint64_t> expect_clone_subset; + expect_clone_subset.insert(offset_2, len_2); + +// **** + + crimson::osd::subsets_t result = + crimson::osd::calc_head_subsets(obj_size, + ss, + head, + missing, + last_backfill); + EXPECT_TRUE(result.clone_subsets[clone] == expect_clone_subset); +} + +TEST(head_subsets, dirty_region_and_clone_overlap) +{ + uint64_t obj_size = 100; + SnapSet ss; + hobject_t head{object_t{"foo"}, "foo", CEPH_NOSNAP, 42, 0, "nspace"}; + pg_missing_t missing; + pg_missing_item item; + uint64_t offset_1, len_1; + offset_1 = 3; + len_1 = 2; + item.clean_regions.mark_data_region_dirty(offset_1, len_1); + missing.add(head, std::move(item)); + hobject_t last_backfill{object_t{"foo1"}, "foo1", CEPH_NOSNAP, 42, 0, "nspace"}; + interval_set<uint64_t> expect_data_region; + expect_data_region.insert(offset_1, len_1); + + // Clone object: + hobject_t clone = head; + clone.snap = 0; + std::map<snapid_t, interval_set<uint64_t>> clone_overlap; // overlap w/ next + interval_set<uint64_t> overlap; + uint64_t offset_2, len_2; + offset_2 = 2; + len_2 = 2; + overlap.insert(offset_2, len_2); + clone_overlap[clone.snap] = overlap; + + // Snapset: + // ss.seq = 0; + // ss.snaps = snaps; (legacy) + ss.clones.push_back(clone.snap); + ss.clone_overlap = clone_overlap; + // ss.clone_size = clone_size; + // ss.clone_snaps = clone_snaps; + + // Expected intervals: + interval_set<uint64_t> expect_clone_subset; + expect_clone_subset.insert(offset_2, len_2); + expect_clone_subset.intersection_of(expect_data_region); + expect_data_region.subtract(expect_clone_subset); + +// **** + + crimson::osd::subsets_t result = + crimson::osd::calc_head_subsets(obj_size, + ss, + head, + missing, + last_backfill); + EXPECT_TRUE(result.clone_subsets[clone] == expect_clone_subset); + EXPECT_TRUE(result.data_subset == expect_data_region); +} + +TEST(clone_subsets, overlap) +{ + uint64_t clone_size = 10; + SnapSet ss; + hobject_t clone{object_t{"foo"}, "foo", 1, 42, 0, "nspace"}; + ss.clone_size[1] = clone_size; + ss.clones.push_back(snapid_t(0)); + ss.clones.push_back(snapid_t(1)); + ss.clones.push_back(snapid_t(2)); + pg_missing_t missing; + pg_missing_item item; + missing.add(clone, std::move(item)); + hobject_t last_backfill{object_t{"foo1"}, "foo1", CEPH_NOSNAP, 42, 0, "nspace"}; + + interval_set<uint64_t> expect_clone_subset1, expect_clone_subset2; + + // Next older clone: + hobject_t older_clone = clone; + older_clone.snap = 0; + { + std::map<snapid_t, interval_set<uint64_t>> clone_overlap; // overlap w/ next + interval_set<uint64_t> overlap; + uint64_t offset_2, len_2; + offset_2 = 4; + len_2 = 2; + overlap.insert(offset_2, len_2); + ss.clone_overlap[older_clone.snap] = overlap; + + // Snapset: + // ss.seq = 0; + // ss.snaps = snaps; (legacy) + // ss.clones.push_back(snapid_t()); + // ss.clone_overlap = clone_overlap; + // ss.clone_size = clone_size; + // ss.clone_snaps = clone_snaps; + + // Expected intervals: + expect_clone_subset1.insert(offset_2, len_2); + } + + // Next newest clone: + hobject_t newest_clone = clone; + newest_clone.snap = 2; + { + std::map<snapid_t, interval_set<uint64_t>> clone_overlap; // overlap w/ next + interval_set<uint64_t> overlap; + uint64_t offset_2, len_2; + offset_2 = 2; + len_2 = 2; + overlap.insert(offset_2, len_2); + ss.clone_overlap[newest_clone.snap - 1] = overlap; + + // Snapset: + // ss.seq = 0; + // ss.snaps = snaps; (legacy) + // ss.clones.push_back(snapid_t()); + // ss.clone_overlap = clone_overlap; + // ss.clone_size = clone_size; + // ss.clone_snaps = clone_snaps; + + // Expected intervals: + expect_clone_subset2.insert(offset_2, len_2); + } + +// **** + + crimson::osd::subsets_t result = + crimson::osd::calc_clone_subsets(ss, + clone, + missing, + last_backfill); + EXPECT_TRUE(result.clone_subsets[older_clone] == expect_clone_subset1); + EXPECT_TRUE(result.clone_subsets[newest_clone] == expect_clone_subset2); +} |