diff options
18 files changed, 490 insertions, 325 deletions
diff --git a/src/crimson/os/seastore/async_cleaner.cc b/src/crimson/os/seastore/async_cleaner.cc index cecdb985532..341c5c5524a 100644 --- a/src/crimson/os/seastore/async_cleaner.cc +++ b/src/crimson/os/seastore/async_cleaner.cc @@ -1142,8 +1142,7 @@ SegmentCleaner::do_reclaim_space( pin->get_key(), pin->get_val(), pin->get_length(), - pin->get_type(), - JOURNAL_SEQ_NULL); + pin->get_type()); } for (auto &cached_backref : cached_backref_entries) { if (cached_backref.laddr == L_ADDR_NULL) { diff --git a/src/crimson/os/seastore/backref_entry.h b/src/crimson/os/seastore/backref_entry.h new file mode 100644 index 00000000000..5f9becc9565 --- /dev/null +++ b/src/crimson/os/seastore/backref_entry.h @@ -0,0 +1,127 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <memory> +#include <iostream> + +#if FMT_VERSION >= 90000 +#include <fmt/ostream.h> +#endif + +#include <boost/intrusive/set.hpp> + +#include "crimson/os/seastore/seastore_types.h" + +namespace crimson::os::seastore { + +struct backref_entry_t { + using ref_t = std::unique_ptr<backref_entry_t>; + + backref_entry_t( + const paddr_t& paddr, + const laddr_t& laddr, + extent_len_t len, + extent_types_t type) + : paddr(paddr), + laddr(laddr), + len(len), + type(type) { + assert(len > 0); + } + paddr_t paddr = P_ADDR_NULL; + laddr_t laddr = L_ADDR_NULL; + extent_len_t len = 0; + extent_types_t type = extent_types_t::NONE; + friend bool operator< ( + const backref_entry_t &l, + const backref_entry_t &r) { + return l.paddr < r.paddr; + } + friend bool operator> ( + const backref_entry_t &l, + const backref_entry_t &r) { + return l.paddr > r.paddr; + } + friend bool operator== ( + const backref_entry_t &l, + const backref_entry_t &r) { + return l.paddr == r.paddr; + } + + using set_hook_t = + boost::intrusive::set_member_hook< + boost::intrusive::link_mode< + boost::intrusive::auto_unlink>>; + set_hook_t backref_set_hook; + using backref_set_member_options = boost::intrusive::member_hook< + backref_entry_t, + set_hook_t, + &backref_entry_t::backref_set_hook>; + using multiset_t = boost::intrusive::multiset< + backref_entry_t, + backref_set_member_options, + boost::intrusive::constant_time_size<false>>; + + struct cmp_t { + using is_transparent = paddr_t; + bool operator()( + const backref_entry_t &l, + const backref_entry_t &r) const { + return l.paddr < r.paddr; + } + bool operator()(const paddr_t l, const backref_entry_t &r) const { + return l < r.paddr; + } + bool operator()(const backref_entry_t &l, const paddr_t r) const { + return l.paddr < r; + } + }; + + static ref_t create_alloc( + const paddr_t& paddr, + const laddr_t& laddr, + extent_len_t len, + extent_types_t type) { + assert(is_backref_mapped_type(type)); + assert(laddr != L_ADDR_NULL); + return std::make_unique<backref_entry_t>( + paddr, laddr, len, type); + } + + static ref_t create_retire( + const paddr_t& paddr, + extent_len_t len, + extent_types_t type) { + assert(is_backref_mapped_type(type) || + is_retired_placeholder_type(type)); + return std::make_unique<backref_entry_t>( + paddr, L_ADDR_NULL, len, type); + } + + static ref_t create(const alloc_blk_t& delta) { + return std::make_unique<backref_entry_t>( + delta.paddr, delta.laddr, delta.len, delta.type); + } +}; + +inline std::ostream &operator<<(std::ostream &out, const backref_entry_t &ent) { + return out << "backref_entry_t{" + << ent.paddr << "~0x" << std::hex << ent.len << std::dec << ", " + << "laddr: " << ent.laddr << ", " + << "type: " << ent.type + << "}"; +} + +using backref_entry_ref = backref_entry_t::ref_t; +using backref_entry_mset_t = backref_entry_t::multiset_t; +using backref_entry_refs_t = std::vector<backref_entry_ref>; +using backref_entryrefs_by_seq_t = std::map<journal_seq_t, backref_entry_refs_t>; +using backref_entry_query_set_t = std::set<backref_entry_t, backref_entry_t::cmp_t>; + +} // namespace crimson::os::seastore + +#if FMT_VERSION >= 90000 +template <> struct fmt::formatter<crimson::os::seastore::backref_entry_t> : fmt::ostream_formatter {}; +#endif diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index cdd9c542d95..5898b9bad0a 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -28,15 +28,6 @@ SET_SUBSYS(seastore_cache); namespace crimson::os::seastore { -std::ostream &operator<<(std::ostream &out, const backref_entry_t &ent) { - return out << "backref_entry_t{" - << ent.paddr << "~0x" << std::hex << ent.len << std::dec << ", " - << "laddr: " << ent.laddr << ", " - << "type: " << ent.type << ", " - << "seq: " << ent.seq << ", " - << "}"; -} - Cache::Cache( ExtentPlacementManager &epm) : epm(epm), @@ -1348,21 +1339,39 @@ record_t Cache::prepare_record( io_stat_t retire_stat; std::vector<alloc_delta_t> alloc_deltas; alloc_delta_t rel_delta; + backref_entry_refs_t backref_entries; rel_delta.op = alloc_delta_t::op_types_t::CLEAR; for (auto &i: t.retired_set) { auto &extent = i.extent; get_by_ext(efforts.retire_by_ext, extent->get_type()).increment(extent->get_length()); retire_stat.increment(extent->get_length()); - DEBUGT("retired and remove extent -- {}", t, *extent); + DEBUGT("retired and remove extent {}~0x{:x} -- {}", + t, extent->get_paddr(), extent->get_length(), *extent); commit_retire_extent(t, extent); - if (is_backref_mapped_extent_node(extent) || - is_retired_placeholder_type(extent->get_type())) { + + // Note: commit extents and backref allocations in the same place + if (is_backref_mapped_type(extent->get_type()) || + is_retired_placeholder_type(extent->get_type())) { + DEBUGT("backref_entry free {}~0x{:x}", + t, + extent->get_paddr(), + extent->get_length()); rel_delta.alloc_blk_ranges.emplace_back( - extent->get_paddr(), - L_ADDR_NULL, - extent->get_length(), - extent->get_type()); + alloc_blk_t::create_retire( + extent->get_paddr(), + extent->get_length(), + extent->get_type())); + backref_entries.emplace_back( + backref_entry_t::create_retire( + extent->get_paddr(), + extent->get_length(), + extent->get_type())); + } else if (is_backref_node(extent->get_type())) { + remove_backref_extent(extent->get_paddr()); + } else { + ERRORT("Got unexpected extent type: {}", t, *extent); + ceph_abort("imposible"); } } alloc_deltas.emplace_back(std::move(rel_delta)); @@ -1399,27 +1408,40 @@ record_t Cache::prepare_record( if (modify_time == NULL_TIME) { modify_time = commit_time; } + laddr_t fresh_laddr; + if (i->is_logical()) { + fresh_laddr = i->cast<LogicalCachedExtent>()->get_laddr(); + } else if (is_lba_node(i->get_type())) { + fresh_laddr = i->cast<lba_manager::btree::LBANode>()->get_node_meta().begin; + } else { + fresh_laddr = L_ADDR_NULL; + } record.push_back(extent_t{ i->get_type(), - i->is_logical() - ? i->cast<LogicalCachedExtent>()->get_laddr() - : (is_lba_node(i->get_type()) - ? i->cast<lba_manager::btree::LBANode>()->get_node_meta().begin - : L_ADDR_NULL), + fresh_laddr, std::move(bl) }, modify_time); - if (i->is_valid() - && is_backref_mapped_extent_node(i)) { + + if (!i->is_valid()) { + continue; + } + if (is_backref_mapped_type(i->get_type())) { + laddr_t alloc_laddr; + if (i->is_logical()) { + alloc_laddr = i->cast<LogicalCachedExtent>()->get_laddr(); + } else if (is_lba_node(i->get_type())) { + alloc_laddr = i->cast<lba_manager::btree::LBANode>()->get_node_meta().begin; + } else { + assert(i->get_type() == extent_types_t::TEST_BLOCK_PHYSICAL); + alloc_laddr = L_ADDR_MIN; + } alloc_delta.alloc_blk_ranges.emplace_back( - i->get_paddr(), - i->is_logical() - ? i->cast<LogicalCachedExtent>()->get_laddr() - : (is_lba_node(i->get_type()) - ? i->cast<lba_manager::btree::LBANode>()->get_node_meta().begin - : L_ADDR_NULL), - i->get_length(), - i->get_type()); + alloc_blk_t::create_alloc( + i->get_paddr(), + alloc_laddr, + i->get_length(), + i->get_type())); } } @@ -1430,14 +1452,20 @@ record_t Cache::prepare_record( get_by_ext(efforts.fresh_ool_by_ext, i->get_type()).increment(i->get_length()); i->prepare_commit(); - if (is_backref_mapped_extent_node(i)) { + if (is_backref_mapped_type(i->get_type())) { + laddr_t alloc_laddr; + if (i->is_logical()) { + alloc_laddr = i->cast<LogicalCachedExtent>()->get_laddr(); + } else { + assert(is_lba_node(i->get_type())); + alloc_laddr = i->cast<lba_manager::btree::LBANode>()->get_node_meta().begin; + } alloc_delta.alloc_blk_ranges.emplace_back( - i->get_paddr(), - i->is_logical() - ? i->cast<LogicalCachedExtent>()->get_laddr() - : i->cast<lba_manager::btree::LBANode>()->get_node_meta().begin, - i->get_length(), - i->get_type()); + alloc_blk_t::create_alloc( + i->get_paddr(), + alloc_laddr, + i->get_length(), + i->get_type())); } } @@ -1459,15 +1487,53 @@ record_t Cache::prepare_record( DEBUGT("inplace rewrite ool block is commmitted -- {}", t, *i); } + auto existing_stats = t.get_existing_block_stats(); + DEBUGT("total existing blocks num: {}, exist clean num: {}, " + "exist mutation pending num: {}", + t, + existing_stats.valid_num, + existing_stats.clean_num, + existing_stats.mutated_num); for (auto &i: t.existing_block_list) { - if (i->is_valid()) { - alloc_delta.alloc_blk_ranges.emplace_back( - i->get_paddr(), + assert(is_logical_type(i->get_type())); + if (!i->is_valid()) { + continue; + } + + if (i->is_exist_clean()) { + i->state = CachedExtent::extent_state_t::CLEAN; + } else { + assert(i->is_exist_mutation_pending()); + // i->state must become DIRTY in complete_commit() + } + + // exist mutation pending extents must be in t.mutated_block_list + add_extent(i); + const auto t_src = t.get_src(); + if (i->is_dirty()) { + add_to_dirty(i, &t_src); + } else { + touch_extent(*i, &t_src); + } + + alloc_delta.alloc_blk_ranges.emplace_back( + alloc_blk_t::create_alloc( + i->get_paddr(), i->cast<LogicalCachedExtent>()->get_laddr(), i->get_length(), - i->get_type()); - } + i->get_type())); + + // Note: commit extents and backref allocations in the same place + // Note: remapping is split into 2 steps, retire and alloc, they must be + // committed atomically together + backref_entries.emplace_back( + backref_entry_t::create_alloc( + i->get_paddr(), + i->cast<LogicalCachedExtent>()->get_laddr(), + i->get_length(), + i->get_type())); } + alloc_deltas.emplace_back(std::move(alloc_delta)); for (auto b : alloc_deltas) { @@ -1521,6 +1587,9 @@ record_t Cache::prepare_record( record.push_back(std::move(delta)); } + apply_backref_mset(backref_entries); + t.set_backref_entries(std::move(backref_entries)); + ceph_assert(t.get_fresh_block_stats().num == t.inline_block_list.size() + t.ool_block_list.size() + @@ -1620,26 +1689,35 @@ record_t Cache::prepare_record( return record; } -void Cache::backref_batch_update( - std::vector<backref_entry_ref> &&list, - const journal_seq_t &seq) +void Cache::apply_backref_byseq( + backref_entry_refs_t&& backref_entries, + const journal_seq_t& seq) { - LOG_PREFIX(Cache::backref_batch_update); - DEBUG("inserting {} entries at {}", list.size(), seq); - ceph_assert(seq != JOURNAL_SEQ_NULL); - - for (auto &ent : list) { - backref_entry_mset.insert(*ent); + LOG_PREFIX(Cache::apply_backref_byseq); + DEBUG("backref_entry apply {} entries at {}", + backref_entries.size(), seq); + assert(seq != JOURNAL_SEQ_NULL); + if (backref_entries.empty()) { + return; } - - auto iter = backref_entryrefs_by_seq.find(seq); - if (iter == backref_entryrefs_by_seq.end()) { - backref_entryrefs_by_seq.emplace(seq, std::move(list)); + if (backref_entryrefs_by_seq.empty()) { + backref_entryrefs_by_seq.insert( + backref_entryrefs_by_seq.end(), + {seq, std::move(backref_entries)}); + return; + } + auto last = backref_entryrefs_by_seq.rbegin(); + assert(last->first <= seq); + if (last->first == seq) { + last->second.insert( + last->second.end(), + std::make_move_iterator(backref_entries.begin()), + std::make_move_iterator(backref_entries.end())); } else { - iter->second.insert( - iter->second.end(), - std::make_move_iterator(list.begin()), - std::make_move_iterator(list.end())); + assert(last->first < seq); + backref_entryrefs_by_seq.insert( + backref_entryrefs_by_seq.end(), + {seq, std::move(backref_entries)}); } } @@ -1652,7 +1730,7 @@ void Cache::complete_commit( SUBTRACET(seastore_t, "final_block_start={}, start_seq={}", t, final_block_start, start_seq); - std::vector<backref_entry_ref> backref_list; + backref_entry_refs_t backref_entries; t.for_each_finalized_fresh_block([&](const CachedExtentRef &i) { if (!i->is_valid()) { return; @@ -1683,22 +1761,28 @@ void Cache::complete_commit( const auto t_src = t.get_src(); touch_extent(*i, &t_src); epm.commit_space_used(i->get_paddr(), i->get_length()); - if (is_backref_mapped_extent_node(i)) { - DEBUGT("backref_list new {} len 0x{:x}", + + // Note: commit extents and backref allocations in the same place + if (is_backref_mapped_type(i->get_type())) { + DEBUGT("backref_entry alloc {}~0x{:x}", t, i->get_paddr(), i->get_length()); - backref_list.emplace_back( - std::make_unique<backref_entry_t>( + laddr_t alloc_laddr; + if (i->is_logical()) { + alloc_laddr = i->cast<LogicalCachedExtent>()->get_laddr(); + } else if (is_lba_node(i->get_type())) { + alloc_laddr = i->cast<lba_manager::btree::LBANode>()->get_node_meta().begin; + } else { + assert(i->get_type() == extent_types_t::TEST_BLOCK_PHYSICAL); + alloc_laddr = L_ADDR_MIN; + } + backref_entries.emplace_back( + backref_entry_t::create_alloc( i->get_paddr(), - i->is_logical() - ? i->cast<LogicalCachedExtent>()->get_laddr() - : (is_lba_node(i->get_type()) - ? i->cast<lba_manager::btree::LBANode>()->get_node_meta().begin - : L_ADDR_NULL), + alloc_laddr, i->get_length(), - i->get_type(), - start_seq)); + i->get_type())); } else if (is_backref_node(i->get_type())) { add_backref_extent( i->get_paddr(), @@ -1735,9 +1819,10 @@ void Cache::complete_commit( epm.mark_space_free(extent->get_paddr(), extent->get_length()); } for (auto &i: t.existing_block_list) { - if (i->is_valid()) { - epm.mark_space_used(i->get_paddr(), i->get_length()); + if (!i->is_valid()) { + continue; } + epm.mark_space_used(i->get_paddr(), i->get_length()); } for (auto &i: t.mutated_block_list) { @@ -1751,64 +1836,10 @@ void Cache::complete_commit( for (auto &i: t.retired_set) { auto &extent = i.extent; extent->dirty_from_or_retired_at = start_seq; - if (is_backref_mapped_extent_node(extent) || - is_retired_placeholder_type(extent->get_type())) { - DEBUGT("backref_list free {} len 0x{:x}", - t, - extent->get_paddr(), - extent->get_length()); - backref_list.emplace_back( - std::make_unique<backref_entry_t>( - extent->get_paddr(), - L_ADDR_NULL, - extent->get_length(), - extent->get_type(), - start_seq)); - } else if (is_backref_node(extent->get_type())) { - remove_backref_extent(extent->get_paddr()); - } else { - ERRORT("{}", t, *extent); - ceph_abort("not possible"); - } } - auto existing_stats = t.get_existing_block_stats(); - DEBUGT("total existing blocks num: {}, exist clean num: {}, " - "exist mutation pending num: {}", - t, - existing_stats.valid_num, - existing_stats.clean_num, - existing_stats.mutated_num); - for (auto &i: t.existing_block_list) { - if (i->is_valid()) { - if (i->is_exist_clean()) { - i->state = CachedExtent::extent_state_t::CLEAN; - } else { - assert(i->state == CachedExtent::extent_state_t::DIRTY); - } - DEBUGT("backref_list new existing {} len 0x{:x}", - t, - i->get_paddr(), - i->get_length()); - backref_list.emplace_back( - std::make_unique<backref_entry_t>( - i->get_paddr(), - i->cast<LogicalCachedExtent>()->get_laddr(), - i->get_length(), - i->get_type(), - start_seq)); - add_extent(i); - const auto t_src = t.get_src(); - if (i->is_dirty()) { - add_to_dirty(i, &t_src); - } else { - touch_extent(*i, &t_src); - } - } - } - if (!backref_list.empty()) { - backref_batch_update(std::move(backref_list), start_seq); - } + apply_backref_byseq(t.move_backref_entries(), start_seq); + commit_backref_entries(std::move(backref_entries), start_seq); for (auto &i: t.pre_alloc_list) { if (!i->is_valid()) { @@ -1931,7 +1962,7 @@ Cache::replay_delta( alloc_delta_t alloc_delta; decode(alloc_delta, delta.bl); - std::vector<backref_entry_ref> backref_list; + backref_entry_refs_t backref_entries; for (auto &alloc_blk : alloc_delta.alloc_blk_ranges) { if (alloc_blk.paddr.is_relative()) { assert(alloc_blk.paddr.is_record_relative()); @@ -1939,17 +1970,10 @@ Cache::replay_delta( } DEBUG("replay alloc_blk {}~0x{:x} {}, journal_seq: {}", alloc_blk.paddr, alloc_blk.len, alloc_blk.laddr, journal_seq); - backref_list.emplace_back( - std::make_unique<backref_entry_t>( - alloc_blk.paddr, - alloc_blk.laddr, - alloc_blk.len, - alloc_blk.type, - journal_seq)); - } - if (!backref_list.empty()) { - backref_batch_update(std::move(backref_list), journal_seq); + backref_entries.emplace_back( + backref_entry_t::create(alloc_blk)); } + commit_backref_entries(std::move(backref_entries), journal_seq); return replay_delta_ertr::make_ready_future<std::pair<bool, CachedExtentRef>>( std::make_pair(true, nullptr)); } diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index 07647f6c7cf..b2248ff37dd 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -3,14 +3,13 @@ #pragma once -#include <iostream> - #include "seastar/core/shared_future.hh" #include "include/buffer.h" #include "crimson/common/errorator.h" #include "crimson/common/errorator-loop.h" +#include "crimson/os/seastore/backref_entry.h" #include "crimson/os/seastore/cached_extent.h" #include "crimson/os/seastore/extent_placement_manager.h" #include "crimson/os/seastore/logging.h" @@ -38,86 +37,6 @@ class FixedKVBtree; class BackrefManager; class SegmentProvider; -struct backref_entry_t { - backref_entry_t( - const paddr_t paddr, - const laddr_t laddr, - const extent_len_t len, - const extent_types_t type, - const journal_seq_t seq) - : paddr(paddr), - laddr(laddr), - len(len), - type(type), - seq(seq) - {} - backref_entry_t(alloc_blk_t alloc_blk) - : paddr(alloc_blk.paddr), - laddr(alloc_blk.laddr), - len(alloc_blk.len), - type(alloc_blk.type) - {} - paddr_t paddr = P_ADDR_NULL; - laddr_t laddr = L_ADDR_NULL; - extent_len_t len = 0; - extent_types_t type = - extent_types_t::ROOT; - journal_seq_t seq; - friend bool operator< ( - const backref_entry_t &l, - const backref_entry_t &r) { - return l.paddr < r.paddr; - } - friend bool operator> ( - const backref_entry_t &l, - const backref_entry_t &r) { - return l.paddr > r.paddr; - } - friend bool operator== ( - const backref_entry_t &l, - const backref_entry_t &r) { - return l.paddr == r.paddr; - } - - using set_hook_t = - boost::intrusive::set_member_hook< - boost::intrusive::link_mode< - boost::intrusive::auto_unlink>>; - set_hook_t backref_set_hook; - using backref_set_member_options = boost::intrusive::member_hook< - backref_entry_t, - set_hook_t, - &backref_entry_t::backref_set_hook>; - using multiset_t = boost::intrusive::multiset< - backref_entry_t, - backref_set_member_options, - boost::intrusive::constant_time_size<false>>; - - struct cmp_t { - using is_transparent = paddr_t; - bool operator()( - const backref_entry_t &l, - const backref_entry_t &r) const { - return l.paddr < r.paddr; - } - bool operator()(const paddr_t l, const backref_entry_t &r) const { - return l < r.paddr; - } - bool operator()(const backref_entry_t &l, const paddr_t r) const { - return l.paddr < r; - } - }; -}; - -std::ostream &operator<<(std::ostream &out, const backref_entry_t &ent); - -using backref_entry_ref = std::unique_ptr<backref_entry_t>; -using backref_entry_mset_t = backref_entry_t::multiset_t; -using backref_entry_refs_t = std::vector<backref_entry_ref>; -using backref_entryrefs_by_seq_t = std::map<journal_seq_t, backref_entry_refs_t>; -using backref_entry_query_set_t = std::set< - backref_entry_t, backref_entry_t::cmp_t>; - /** * Cache * @@ -984,7 +903,7 @@ private: for (auto it = start_iter; it != end_iter; it++) { - res.emplace(it->paddr, it->laddr, it->len, it->type, it->seq); + res.emplace(it->paddr, it->laddr, it->len, it->type); } return res; } @@ -1907,9 +1826,23 @@ private: seastar::metrics::metric_group metrics; void register_metrics(); - void backref_batch_update( - std::vector<backref_entry_ref> &&, - const journal_seq_t &); + void apply_backref_mset( + backref_entry_refs_t& backref_entries) { + for (auto& entry : backref_entries) { + backref_entry_mset.insert(*entry); + } + } + + void apply_backref_byseq( + backref_entry_refs_t&& backref_entries, + const journal_seq_t& seq); + + void commit_backref_entries( + backref_entry_refs_t&& backref_entries, + const journal_seq_t& seq) { + apply_backref_mset(backref_entries); + apply_backref_byseq(std::move(backref_entries), seq); + } /// Add extent to extents handling dirty and refcounting /// diff --git a/src/crimson/os/seastore/cached_extent.cc b/src/crimson/os/seastore/cached_extent.cc index 085a519cb68..ab2492f5bb6 100644 --- a/src/crimson/os/seastore/cached_extent.cc +++ b/src/crimson/os/seastore/cached_extent.cc @@ -38,12 +38,6 @@ void intrusive_ptr_release(CachedExtent *ptr) #endif -bool is_backref_mapped_extent_node(const CachedExtentRef &extent) { - return extent->is_logical() - || is_lba_node(extent->get_type()) - || extent->get_type() == extent_types_t::TEST_BLOCK_PHYSICAL; -} - std::ostream &operator<<(std::ostream &out, CachedExtent::extent_state_t state) { switch (state) { diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index 782afa19d33..f9356f40b83 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -1097,8 +1097,6 @@ protected: std::ostream &operator<<(std::ostream &, CachedExtent::extent_state_t); std::ostream &operator<<(std::ostream &, const CachedExtent&); -bool is_backref_mapped_extent_node(const CachedExtentRef &extent); - /// Compare extents by paddr struct paddr_cmp { bool operator()(paddr_t lhs, const CachedExtent &rhs) const { diff --git a/src/crimson/os/seastore/journal.h b/src/crimson/os/seastore/journal.h index a5c9029c43c..298935bd22e 100644 --- a/src/crimson/os/seastore/journal.h +++ b/src/crimson/os/seastore/journal.h @@ -59,13 +59,13 @@ public: crimson::ct_error::erange, crimson::ct_error::input_output_error >; - using submit_record_ret = submit_record_ertr::future< - record_locator_t - >; - virtual submit_record_ret submit_record( + using on_submission_func_t = std::function< + void(record_locator_t)>; + virtual submit_record_ertr::future<> submit_record( record_t &&record, - OrderingHandle &handle - ) = 0; + OrderingHandle &handle, + transaction_type_t t_src, + on_submission_func_t &&on_submission) = 0; /** * flush @@ -101,9 +101,6 @@ public: virtual replay_ret replay( delta_handler_t &&delta_handler) = 0; - virtual seastar::future<> finish_commit( - transaction_type_t type) = 0; - virtual ~Journal() {} virtual backend_type_t get_type() = 0; diff --git a/src/crimson/os/seastore/journal/circular_bounded_journal.cc b/src/crimson/os/seastore/journal/circular_bounded_journal.cc index 9ee8b1b997f..41ff8318aba 100644 --- a/src/crimson/os/seastore/journal/circular_bounded_journal.cc +++ b/src/crimson/os/seastore/journal/circular_bounded_journal.cc @@ -58,35 +58,52 @@ CircularBoundedJournal::close_ertr::future<> CircularBoundedJournal::close() return record_submitter.close(); } -CircularBoundedJournal::submit_record_ret +CircularBoundedJournal::submit_record_ertr::future<> CircularBoundedJournal::submit_record( record_t &&record, - OrderingHandle &handle) + OrderingHandle &handle, + transaction_type_t t_src, + on_submission_func_t &&on_submission) { LOG_PREFIX(CircularBoundedJournal::submit_record); DEBUG("H{} {} start ...", (void*)&handle, record); assert(write_pipeline); - return do_submit_record(std::move(record), handle); + return do_submit_record( + std::move(record), handle, std::move(on_submission) + ).safe_then([this, t_src] { + if (is_trim_transaction(t_src)) { + return update_journal_tail( + trimmer.get_dirty_tail(), + trimmer.get_alloc_tail()); + } else { + return seastar::now(); + } + }); } -CircularBoundedJournal::submit_record_ret +CircularBoundedJournal::submit_record_ertr::future<> CircularBoundedJournal::do_submit_record( record_t &&record, - OrderingHandle &handle) + OrderingHandle &handle, + on_submission_func_t &&on_submission) { LOG_PREFIX(CircularBoundedJournal::do_submit_record); if (!record_submitter.is_available()) { DEBUG("H{} wait ...", (void*)&handle); return record_submitter.wait_available( - ).safe_then([this, record=std::move(record), &handle]() mutable { - return do_submit_record(std::move(record), handle); + ).safe_then([this, record=std::move(record), &handle, + on_submission=std::move(on_submission)]() mutable { + return do_submit_record( + std::move(record), handle, std::move(on_submission)); }); } auto action = record_submitter.check_action(record.size); if (action == RecordSubmitter::action_t::ROLL) { return record_submitter.roll_segment( - ).safe_then([this, record=std::move(record), &handle]() mutable { - return do_submit_record(std::move(record), handle); + ).safe_then([this, record=std::move(record), &handle, + on_submission=std::move(on_submission)]() mutable { + return do_submit_record( + std::move(record), handle, std::move(on_submission)); }); } @@ -99,13 +116,16 @@ CircularBoundedJournal::do_submit_record( return handle.enter(write_pipeline->device_submission ).then([submit_fut=std::move(submit_ret.future)]() mutable { return std::move(submit_fut); - }).safe_then([FNAME, this, &handle](record_locator_t result) { + }).safe_then([FNAME, this, &handle, on_submission=std::move(on_submission) + ](record_locator_t result) mutable { return handle.enter(write_pipeline->finalize - ).then([FNAME, this, result, &handle] { + ).then([FNAME, this, result, &handle, + on_submission=std::move(on_submission)] { DEBUG("H{} finish with {}", (void*)&handle, result); auto new_committed_to = result.write_result.get_end_seq(); record_submitter.update_committed_to(new_committed_to); - return result; + std::invoke(on_submission, result); + return seastar::now(); }); }); } @@ -392,13 +412,4 @@ Journal::replay_ret CircularBoundedJournal::replay( }); } -seastar::future<> CircularBoundedJournal::finish_commit(transaction_type_t type) { - if (is_trim_transaction(type)) { - return update_journal_tail( - trimmer.get_dirty_tail(), - trimmer.get_alloc_tail()); - } - return seastar::now(); -} - } diff --git a/src/crimson/os/seastore/journal/circular_bounded_journal.h b/src/crimson/os/seastore/journal/circular_bounded_journal.h index 874bd8dc086..16278df6cfe 100644 --- a/src/crimson/os/seastore/journal/circular_bounded_journal.h +++ b/src/crimson/os/seastore/journal/circular_bounded_journal.h @@ -80,9 +80,11 @@ public: return backend_type_t::RANDOM_BLOCK; } - submit_record_ret submit_record( + submit_record_ertr::future<> submit_record( record_t &&record, - OrderingHandle &handle + OrderingHandle &handle, + transaction_type_t t_src, + on_submission_func_t &&on_submission ) final; seastar::future<> flush( @@ -148,8 +150,6 @@ public: return cjs.get_records_start(); } - seastar::future<> finish_commit(transaction_type_t type) final; - using cbj_delta_handler_t = std::function< replay_ertr::future<bool>( const record_locator_t&, @@ -160,7 +160,10 @@ public: cbj_delta_handler_t &&delta_handler, journal_seq_t tail); - submit_record_ret do_submit_record(record_t &&record, OrderingHandle &handle); + submit_record_ertr::future<> do_submit_record( + record_t &&record, + OrderingHandle &handle, + on_submission_func_t &&on_submission); void try_read_rolled_header(scan_valid_records_cursor &cursor) { paddr_t addr = convert_abs_addr_to_paddr( diff --git a/src/crimson/os/seastore/journal/segmented_journal.cc b/src/crimson/os/seastore/journal/segmented_journal.cc index 6be2ad4936a..67c0b3fb8ac 100644 --- a/src/crimson/os/seastore/journal/segmented_journal.cc +++ b/src/crimson/os/seastore/journal/segmented_journal.cc @@ -368,25 +368,30 @@ seastar::future<> SegmentedJournal::flush(OrderingHandle &handle) }); } -SegmentedJournal::submit_record_ret +SegmentedJournal::submit_record_ertr::future<> SegmentedJournal::do_submit_record( record_t &&record, - OrderingHandle &handle) + OrderingHandle &handle, + on_submission_func_t &&on_submission) { LOG_PREFIX(SegmentedJournal::do_submit_record); if (!record_submitter.is_available()) { DEBUG("H{} wait ...", (void*)&handle); return record_submitter.wait_available( - ).safe_then([this, record=std::move(record), &handle]() mutable { - return do_submit_record(std::move(record), handle); + ).safe_then([this, record=std::move(record), &handle, + on_submission=std::move(on_submission)]() mutable { + return do_submit_record( + std::move(record), handle, std::move(on_submission)); }); } auto action = record_submitter.check_action(record.size); if (action == RecordSubmitter::action_t::ROLL) { DEBUG("H{} roll, unavailable ...", (void*)&handle); return record_submitter.roll_segment( - ).safe_then([this, record=std::move(record), &handle]() mutable { - return do_submit_record(std::move(record), handle); + ).safe_then([this, record=std::move(record), &handle, + on_submission=std::move(on_submission)]() mutable { + return do_submit_record( + std::move(record), handle, std::move(on_submission)); }); } else { // SUBMIT_FULL/NOT_FULL DEBUG("H{} submit {} ...", @@ -398,22 +403,27 @@ SegmentedJournal::do_submit_record( return handle.enter(write_pipeline->device_submission ).then([submit_fut=std::move(submit_ret.future)]() mutable { return std::move(submit_fut); - }).safe_then([FNAME, this, &handle](record_locator_t result) { + }).safe_then([FNAME, this, &handle, on_submission=std::move(on_submission) + ](record_locator_t result) mutable { return handle.enter(write_pipeline->finalize - ).then([FNAME, this, result, &handle] { + ).then([FNAME, this, result, &handle, + on_submission=std::move(on_submission)] { DEBUG("H{} finish with {}", (void*)&handle, result); auto new_committed_to = result.write_result.get_end_seq(); record_submitter.update_committed_to(new_committed_to); - return result; + std::invoke(on_submission, result); + return seastar::now(); }); }); } } -SegmentedJournal::submit_record_ret +SegmentedJournal::submit_record_ertr::future<> SegmentedJournal::submit_record( record_t &&record, - OrderingHandle &handle) + OrderingHandle &handle, + transaction_type_t t_src, + on_submission_func_t &&on_submission) { LOG_PREFIX(SegmentedJournal::submit_record); DEBUG("H{} {} start ...", (void*)&handle, record); @@ -429,7 +439,8 @@ SegmentedJournal::submit_record( return crimson::ct_error::erange::make(); } - return do_submit_record(std::move(record), handle); + return do_submit_record( + std::move(record), handle, std::move(on_submission)); } } diff --git a/src/crimson/os/seastore/journal/segmented_journal.h b/src/crimson/os/seastore/journal/segmented_journal.h index 891de7ec306..3f51de70fb3 100644 --- a/src/crimson/os/seastore/journal/segmented_journal.h +++ b/src/crimson/os/seastore/journal/segmented_journal.h @@ -44,9 +44,11 @@ public: close_ertr::future<> close() final; - submit_record_ret submit_record( + submit_record_ertr::future<> submit_record( record_t &&record, - OrderingHandle &handle) final; + OrderingHandle &handle, + transaction_type_t t_src, + on_submission_func_t &&on_submission) final; seastar::future<> flush(OrderingHandle &handle) final; @@ -59,9 +61,6 @@ public: backend_type_t get_type() final { return backend_type_t::SEGMENTED; } - seastar::future<> finish_commit(transaction_type_t type) { - return seastar::now(); - } bool is_checksum_needed() final { // segmented journal always requires checksum @@ -69,10 +68,10 @@ public: } private: - submit_record_ret do_submit_record( + submit_record_ertr::future<> do_submit_record( record_t &&record, - OrderingHandle &handle - ); + OrderingHandle &handle, + on_submission_func_t &&on_submission); SegmentSeqAllocatorRef segment_seq_allocator; SegmentAllocator journal_segment_allocator; diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index 7c7a6833006..335a439dcb5 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -1228,7 +1228,6 @@ constexpr laddr_t L_ADDR_MAX = laddr_t::from_raw_uint(laddr_t::RAW_VALUE_MAX); constexpr laddr_t L_ADDR_MIN = laddr_t::from_raw_uint(0); constexpr laddr_t L_ADDR_NULL = L_ADDR_MAX; constexpr laddr_t L_ADDR_ROOT = laddr_t::from_raw_uint(laddr_t::RAW_VALUE_MAX - 1); -constexpr laddr_t L_ADDR_LBAT = laddr_t::from_raw_uint(laddr_t::RAW_VALUE_MAX - 2); struct __attribute__((packed)) laddr_le_t { ceph_le64 laddr; @@ -1469,6 +1468,23 @@ constexpr bool is_physical_type(extent_types_t type) { } } +constexpr bool is_backref_mapped_type(extent_types_t type) { + if ((type >= extent_types_t::LADDR_INTERNAL && + type <= extent_types_t::OBJECT_DATA_BLOCK) || + type == extent_types_t::TEST_BLOCK || + type == extent_types_t::TEST_BLOCK_PHYSICAL) { + assert(is_logical_type(type) || + is_lba_node(type) || + type == extent_types_t::TEST_BLOCK_PHYSICAL); + return true; + } else { + assert(!is_logical_type(type) && + !is_lba_node(type) && + type != extent_types_t::TEST_BLOCK_PHYSICAL); + return false; + } +} + constexpr bool is_real_type(extent_types_t type) { if (type <= extent_types_t::OBJECT_DATA_BLOCK || (type >= extent_types_t::TEST_BLOCK && @@ -1945,12 +1961,13 @@ struct __attribute__((packed)) root_t { struct alloc_blk_t { alloc_blk_t( - paddr_t paddr, - laddr_t laddr, + const paddr_t& paddr, + const laddr_t& laddr, extent_len_t len, extent_types_t type) - : paddr(paddr), laddr(laddr), len(len), type(type) - {} + : paddr(paddr), laddr(laddr), len(len), type(type) { + assert(len > 0); + } explicit alloc_blk_t() = default; @@ -1966,6 +1983,25 @@ struct alloc_blk_t { denc(v.type, p); DENC_FINISH(p); } + + static alloc_blk_t create_alloc( + const paddr_t& paddr, + const laddr_t& laddr, + extent_len_t len, + extent_types_t type) { + assert(is_backref_mapped_type(type)); + assert(laddr != L_ADDR_NULL); + return alloc_blk_t(paddr, laddr, len, type); + } + + static alloc_blk_t create_retire( + const paddr_t& paddr, + extent_len_t len, + extent_types_t type) { + assert(is_backref_mapped_type(type) || + is_retired_placeholder_type(type)); + return alloc_blk_t(paddr, L_ADDR_NULL, len, type); + } }; // use absolute address diff --git a/src/crimson/os/seastore/transaction.h b/src/crimson/os/seastore/transaction.h index 9b95161a404..66a9f896520 100644 --- a/src/crimson/os/seastore/transaction.h +++ b/src/crimson/os/seastore/transaction.h @@ -8,11 +8,12 @@ #include <boost/intrusive/list.hpp> #include "crimson/common/log.h" +#include "crimson/os/seastore/backref_entry.h" +#include "crimson/os/seastore/cached_extent.h" #include "crimson/os/seastore/logging.h" #include "crimson/os/seastore/ordering_handle.h" -#include "crimson/os/seastore/seastore_types.h" -#include "crimson/os/seastore/cached_extent.h" #include "crimson/os/seastore/root_block.h" +#include "crimson/os/seastore/seastore_types.h" #include "crimson/os/seastore/transaction_interruptor.h" namespace crimson::os::seastore { @@ -460,6 +461,7 @@ public: ool_write_stats = {}; rewrite_stats = {}; conflicted = false; + assert(backref_entries.empty()); if (!has_reset) { has_reset = true; } @@ -575,6 +577,15 @@ private: friend class Cache; friend Ref make_test_transaction(); + void set_backref_entries(backref_entry_refs_t&& entries) { + assert(backref_entries.empty()); + backref_entries = std::move(entries); + } + + backref_entry_refs_t move_backref_entries() { + return std::move(backref_entries); + } + /** * If set, *this may not be used to perform writes and will not provide * consistentency allowing operations using to avoid maintaining a read_set. @@ -669,6 +680,8 @@ private: transaction_id_t trans_id = TRANS_ID_NULL; seastar::lw_shared_ptr<rbm_pending_ool_t> pending_ool; + + backref_entry_refs_t backref_entries; }; using TransactionRef = Transaction::Ref; diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 94e9b3b9ab1..753bd5d6ff6 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -461,8 +461,12 @@ TransactionManager::do_submit_transaction( } SUBTRACET(seastore_t, "submitting record", tref); - return journal->submit_record(std::move(record), tref.get_handle() - ).safe_then([this, FNAME, &tref](auto submit_result) mutable { + return journal->submit_record( + std::move(record), + tref.get_handle(), + tref.get_src(), + [this, FNAME, &tref](record_locator_t submit_result) + { SUBDEBUGT(seastore_t, "committed with {}", tref, submit_result); auto start_seq = submit_result.write_result.start_seq; journal->get_trimmer().set_journal_head(start_seq); @@ -473,10 +477,8 @@ TransactionManager::do_submit_transaction( journal->get_trimmer().update_journal_tails( cache->get_oldest_dirty_from().value_or(start_seq), cache->get_oldest_backref_dirty_from().value_or(start_seq)); - return journal->finish_commit(tref.get_src() - ).then([&tref] { - return tref.get_handle().complete(); - }); + }).safe_then([&tref] { + return tref.get_handle().complete(); }).handle_error( submit_transaction_iertr::pass_further{}, crimson::ct_error::assert_all{"Hit error submitting to journal"} diff --git a/src/test/crimson/seastore/test_block.h b/src/test/crimson/seastore/test_block.h index e1fe8e06f8a..546f357dea0 100644 --- a/src/test/crimson/seastore/test_block.h +++ b/src/test/crimson/seastore/test_block.h @@ -39,8 +39,8 @@ struct test_block_delta_t { inline std::ostream &operator<<( std::ostream &lhs, const test_extent_desc_t &rhs) { - return lhs << "test_extent_desc_t(len=" << rhs.len - << ", checksum=" << rhs.checksum << ")"; + return lhs << "test_extent_desc_t(len=0x" << std::hex << rhs.len + << ", checksum=0x" << rhs.checksum << std::dec << ")"; } struct TestBlock : crimson::os::seastore::LogicalCachedExtent { diff --git a/src/test/crimson/seastore/test_btree_lba_manager.cc b/src/test/crimson/seastore/test_btree_lba_manager.cc index 9988df3a124..8b1f7435c87 100644 --- a/src/test/crimson/seastore/test_btree_lba_manager.cc +++ b/src/test/crimson/seastore/test_btree_lba_manager.cc @@ -112,14 +112,22 @@ struct btree_test_base : seastar::future<> submit_transaction(TransactionRef t) { auto record = cache->prepare_record(*t, JOURNAL_SEQ_NULL, JOURNAL_SEQ_NULL); - return journal->submit_record(std::move(record), t->get_handle()).safe_then( - [this, t=std::move(t)](auto submit_result) mutable { - cache->complete_commit( - *t, + return seastar::do_with( + std::move(t), [this, record=std::move(record)](auto& _t) mutable { + auto& t = *_t; + return journal->submit_record( + std::move(record), + t.get_handle(), + t.get_src(), + [this, &t](auto submit_result) { + cache->complete_commit( + t, submit_result.record_block_base, submit_result.write_result.start_seq); - complete_commit(*t); - }).handle_error(crimson::ct_error::assert_all{}); + complete_commit(t); + } + ).handle_error(crimson::ct_error::assert_all{}); + }); } virtual LBAManager::mkfs_ret test_structure_setup(Transaction &t) = 0; diff --git a/src/test/crimson/seastore/test_cbjournal.cc b/src/test/crimson/seastore/test_cbjournal.cc index d00a0f42729..47a08d68cbb 100644 --- a/src/test/crimson/seastore/test_cbjournal.cc +++ b/src/test/crimson/seastore/test_cbjournal.cc @@ -181,15 +181,20 @@ struct cbjournal_test_t : public seastar_test_suite_t, JournalTrimmer auto submit_record(record_t&& record) { entries.push_back(record); + entry_validator_t& back = entries.back(); OrderingHandle handle = get_dummy_ordering_handle(); - auto [addr, w_result] = cbj->submit_record( - std::move(record), - handle).unsafe_get(); - entries.back().seq = w_result.start_seq; - entries.back().entries = 1; - entries.back().magic = cbj->get_cjs().get_cbj_header().magic; - logger().debug("submit entry to addr {}", entries.back().seq); - return convert_paddr_to_abs_addr(entries.back().seq.offset); + cbj->submit_record( + std::move(record), + handle, + transaction_type_t::MUTATE, + [this, &back](auto locator) { + back.seq = locator.write_result.start_seq; + back.entries = 1; + back.magic = cbj->get_cjs().get_cbj_header().magic; + logger().debug("submit entry to addr {}", back.seq); + } + ).unsafe_get(); + return convert_paddr_to_abs_addr(back.seq.offset); } seastar::future<> tear_down_fut() final { diff --git a/src/test/crimson/seastore/test_seastore_journal.cc b/src/test/crimson/seastore/test_seastore_journal.cc index 2eb791b1d46..04a99319b11 100644 --- a/src/test/crimson/seastore/test_seastore_journal.cc +++ b/src/test/crimson/seastore/test_seastore_journal.cc @@ -233,12 +233,17 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider, JournalTrimmer { auto submit_record(T&&... _record) { auto record{std::forward<T>(_record)...}; records.push_back(record); + record_validator_t& back = records.back(); OrderingHandle handle = get_dummy_ordering_handle(); - auto [addr, _] = journal->submit_record( + journal->submit_record( std::move(record), - handle).unsafe_get(); - records.back().record_final_offset = addr; - return addr; + handle, + transaction_type_t::MUTATE, + [&back](auto locator) { + back.record_final_offset = locator.record_block_base; + } + ).unsafe_get(); + return back.record_final_offset; } extent_t generate_extent(size_t blocks) { |