diff options
Diffstat (limited to 'src/crimson/os/seastore/cache.h')
-rw-r--r-- | src/crimson/os/seastore/cache.h | 905 |
1 files changed, 574 insertions, 331 deletions
diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index 4441df86d4e..a239b861726 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -3,13 +3,13 @@ #pragma once -#include <iostream> - #include "seastar/core/shared_future.hh" #include "include/buffer.h" #include "crimson/common/errorator.h" +#include "crimson/common/errorator-loop.h" +#include "crimson/os/seastore/backref_entry.h" #include "crimson/os/seastore/cached_extent.h" #include "crimson/os/seastore/extent_placement_manager.h" #include "crimson/os/seastore/logging.h" @@ -37,86 +37,6 @@ class FixedKVBtree; class BackrefManager; class SegmentProvider; -struct backref_entry_t { - backref_entry_t( - const paddr_t paddr, - const laddr_t laddr, - const extent_len_t len, - const extent_types_t type, - const journal_seq_t seq) - : paddr(paddr), - laddr(laddr), - len(len), - type(type), - seq(seq) - {} - backref_entry_t(alloc_blk_t alloc_blk) - : paddr(alloc_blk.paddr), - laddr(alloc_blk.laddr), - len(alloc_blk.len), - type(alloc_blk.type) - {} - paddr_t paddr = P_ADDR_NULL; - laddr_t laddr = L_ADDR_NULL; - extent_len_t len = 0; - extent_types_t type = - extent_types_t::ROOT; - journal_seq_t seq; - friend bool operator< ( - const backref_entry_t &l, - const backref_entry_t &r) { - return l.paddr < r.paddr; - } - friend bool operator> ( - const backref_entry_t &l, - const backref_entry_t &r) { - return l.paddr > r.paddr; - } - friend bool operator== ( - const backref_entry_t &l, - const backref_entry_t &r) { - return l.paddr == r.paddr; - } - - using set_hook_t = - boost::intrusive::set_member_hook< - boost::intrusive::link_mode< - boost::intrusive::auto_unlink>>; - set_hook_t backref_set_hook; - using backref_set_member_options = boost::intrusive::member_hook< - backref_entry_t, - set_hook_t, - &backref_entry_t::backref_set_hook>; - using multiset_t = boost::intrusive::multiset< - backref_entry_t, - backref_set_member_options, - boost::intrusive::constant_time_size<false>>; - - struct cmp_t { - using is_transparent = paddr_t; - bool operator()( - const backref_entry_t &l, - const backref_entry_t &r) const { - return l.paddr < r.paddr; - } - bool operator()(const paddr_t l, const backref_entry_t &r) const { - return l < r.paddr; - } - bool operator()(const backref_entry_t &l, const paddr_t r) const { - return l.paddr < r; - } - }; -}; - -std::ostream &operator<<(std::ostream &out, const backref_entry_t &ent); - -using backref_entry_ref = std::unique_ptr<backref_entry_t>; -using backref_entry_mset_t = backref_entry_t::multiset_t; -using backref_entry_refs_t = std::vector<backref_entry_ref>; -using backref_entryrefs_by_seq_t = std::map<journal_seq_t, backref_entry_refs_t>; -using backref_entry_query_set_t = std::set< - backref_entry_t, backref_entry_t::cmp_t>; - /** * Cache * @@ -167,7 +87,7 @@ using backref_entry_query_set_t = std::set< * - Remove all extents in the retired_set from Cache::extents * - Mark all extents in the write_set wait_io(), add promises to * transaction - * - Merge Transaction::write_set into Cache::extents + * - Merge Transaction::write_set into Cache::extents_index * * After phase 2, the user will submit the record to the journal. * Once complete, we perform phase 3: @@ -198,10 +118,13 @@ public: Cache(ExtentPlacementManager &epm); ~Cache(); + cache_stats_t get_stats(bool report_detail, double seconds) const; + /// Creates empty transaction by source TransactionRef create_transaction( Transaction::src_t src, const char* name, + cache_hint_t cache_hint, bool is_weak) { LOG_PREFIX(Cache::create_transaction); @@ -215,7 +138,8 @@ public: [this](Transaction& t) { return on_transaction_destruct(t); }, - ++next_id + ++next_id, + cache_hint ); SUBDEBUGT(seastore_t, "created name={}, source={}, is_weak={}", *ret, name, src, is_weak); @@ -270,6 +194,11 @@ public: return t.root; } + void account_absent_access(Transaction::src_t src) { + ++(get_by_src(stats.cache_absent_by_src, src)); + ++stats.access.cache_absent; + } + /** * get_extent_if_cached * @@ -285,12 +214,29 @@ public: CachedExtentRef ret; LOG_PREFIX(Cache::get_extent_if_cached); auto result = t.get_extent(offset, &ret); + const auto t_src = t.get_src(); + extent_access_stats_t& access_stats = get_by_ext( + get_by_src(stats.access_by_src_ext, t_src), + type); if (result == Transaction::get_extent_ret::RETIRED) { SUBDEBUGT(seastore_cache, "{} {} is retired on t -- {}", t, type, offset, *ret); return get_extent_if_cached_iertr::make_ready_future< CachedExtentRef>(ret); } else if (result == Transaction::get_extent_ret::PRESENT) { + if (ret->is_stable()) { + if (ret->is_dirty()) { + ++access_stats.trans_dirty; + ++stats.access.s.trans_dirty; + } else { + ++access_stats.trans_lru; + ++stats.access.s.trans_lru; + } + } else { + ++access_stats.trans_pending; + ++stats.access.s.trans_pending; + } + if (ret->is_fully_loaded()) { SUBTRACET(seastore_cache, "{} {} is present on t -- {}", t, type, offset, *ret); @@ -299,27 +245,40 @@ public: CachedExtentRef>(ret); }); } else { - SUBDEBUGT(seastore_cache, "{} {} is present on t -- {}" - " without being fully loaded", t, type, offset, *ret); + SUBDEBUGT(seastore_cache, + "{} {} is present on t -- {} without fully loaded", + t, type, offset, *ret); return get_extent_if_cached_iertr::make_ready_future< CachedExtentRef>(); } } // get_extent_ret::ABSENT from transaction - auto metric_key = std::make_pair(t.get_src(), type); - ret = query_cache(offset, &metric_key); + ret = query_cache(offset); if (!ret) { SUBDEBUGT(seastore_cache, "{} {} is absent", t, type, offset); + account_absent_access(t_src); return get_extent_if_cached_iertr::make_ready_future<CachedExtentRef>(); - } else if (ret->get_type() == extent_types_t::RETIRED_PLACEHOLDER) { + } else if (is_retired_placeholder_type(ret->get_type())) { // retired_placeholder is not really cached yet SUBDEBUGT(seastore_cache, "{} {} is absent(placeholder)", t, type, offset); + account_absent_access(t_src); return get_extent_if_cached_iertr::make_ready_future<CachedExtentRef>(); - } else if (!ret->is_fully_loaded()) { - SUBDEBUGT(seastore_cache, "{} {} is present without " - "being fully loaded", t, type, offset); + } + + if (ret->is_dirty()) { + ++access_stats.cache_dirty; + ++stats.access.s.cache_dirty; + } else { + ++access_stats.cache_lru; + ++stats.access.s.cache_lru; + } + + if (!ret->is_fully_loaded()) { + // ignore non-full extent + SUBDEBUGT(seastore_cache, + "{} {} is present without fully loaded", t, type, offset); return get_extent_if_cached_iertr::make_ready_future<CachedExtentRef>(); } @@ -327,7 +286,7 @@ public: SUBDEBUGT(seastore_cache, "{} {} is present in cache -- {}", t, type, offset, *ret); t.add_to_read_set(ret); - touch_extent(*ret); + touch_extent(*ret, &t_src, t.get_cache_hint()); return ret->wait_io().then([ret] { return get_extent_if_cached_iertr::make_ready_future< CachedExtentRef>(ret); @@ -346,6 +305,8 @@ public: * * Note, the current implementation leverages parent-child * pointers in LBA instead, so it should only be called in tests. + * + * This path won't be accounted by the cache_access_stats_t. */ using get_extent_iertr = base_iertr; template <typename T> @@ -356,41 +317,37 @@ public: extent_len_t length) { CachedExtentRef ret; LOG_PREFIX(Cache::get_caching_extent); + const auto t_src = t.get_src(); auto result = t.get_extent(offset, &ret); if (result == Transaction::get_extent_ret::RETIRED) { - SUBERRORT(seastore_cache, "{} {}~{} is retired on t -- {}", + SUBERRORT(seastore_cache, "{} {}~0x{:x} is retired on t -- {}", t, T::TYPE, offset, length, *ret); ceph_abort("impossible"); } else if (result == Transaction::get_extent_ret::PRESENT) { + assert(ret->get_length() == length); if (ret->is_fully_loaded()) { - SUBTRACET(seastore_cache, "{} {}~{} is present on t -- {}", + SUBTRACET(seastore_cache, "{} {}~0x{:x} is present on t -- {}", t, T::TYPE, offset, length, *ret); return ret->wait_io().then([ret] { return seastar::make_ready_future<TCachedExtentRef<T>>( ret->cast<T>()); }); } else { - assert(!ret->is_mutable()); - touch_extent(*ret); - SUBDEBUGT(seastore_cache, "{} {}~{} is present on t without been \ - fully loaded, reading ... {}", t, T::TYPE, offset, length, *ret); - auto bp = alloc_cache_buf(ret->get_length()); - ret->set_bptr(std::move(bp)); - return read_extent<T>( - ret->cast<T>()); + SUBDEBUGT(seastore_cache, + "{} {}~0x{:x} is present on t without fully loaded, reading ... -- {}", + t, T::TYPE, offset, length, *ret); + return do_read_extent_maybe_partial<T>(ret->cast<T>(), 0, length, &t_src); } } else { - SUBTRACET(seastore_cache, "{} {}~{} is absent on t, query cache ...", + SUBTRACET(seastore_cache, "{} {}~0x{:x} is absent on t, query cache ...", t, T::TYPE, offset, length); - auto f = [&t, this](CachedExtent &ext) { + auto f = [&t, this, t_src](CachedExtent &ext) { t.add_to_read_set(CachedExtentRef(&ext)); - touch_extent(ext); + touch_extent(ext, &t_src, t.get_cache_hint()); }; - auto metric_key = std::make_pair(t.get_src(), T::TYPE); return trans_intr::make_interruptible( do_get_caching_extent<T>( - offset, length, &metric_key, - [](T &){}, std::move(f)) + offset, length, [](T &){}, std::move(f), &t_src) ); } } @@ -399,12 +356,15 @@ public: * get_absent_extent * * The extent in query is supposed to be absent in Cache. + * partially load buffer from partial_off~partial_len if not present. */ template <typename T, typename Func> get_extent_iertr::future<TCachedExtentRef<T>> get_absent_extent( Transaction &t, paddr_t offset, extent_len_t length, + extent_len_t partial_off, + extent_len_t partial_len, Func &&extent_init_func) { CachedExtentRef ret; LOG_PREFIX(Cache::get_absent_extent); @@ -417,17 +377,26 @@ public: } #endif - SUBTRACET(seastore_cache, "{} {}~{} is absent on t, query cache ...", + SUBTRACET(seastore_cache, "{} {}~0x{:x} is absent on t, query cache ...", t, T::TYPE, offset, length); - auto f = [&t, this](CachedExtent &ext) { + const auto t_src = t.get_src(); + auto f = [&t, this, t_src](CachedExtent &ext) { + // FIXME: assert(ext.is_stable_clean()); + assert(ext.is_stable()); + assert(T::TYPE == ext.get_type()); + extent_access_stats_t& access_stats = get_by_ext( + get_by_src(stats.access_by_src_ext, t_src), + T::TYPE); + ++access_stats.load_absent; + ++stats.access.s.load_absent; + t.add_to_read_set(CachedExtentRef(&ext)); - touch_extent(ext); + touch_extent(ext, &t_src, t.get_cache_hint()); }; - auto metric_key = std::make_pair(t.get_src(), T::TYPE); return trans_intr::make_interruptible( do_get_caching_extent<T>( - offset, length, &metric_key, - std::forward<Func>(extent_init_func), std::move(f)) + offset, length, partial_off, partial_len, + std::forward<Func>(extent_init_func), std::move(f), &t_src) ); } @@ -451,6 +420,16 @@ public: return get_absent_extent<T>(t, offset, length, [](T &){}); } + template <typename T, typename Func> + get_extent_iertr::future<TCachedExtentRef<T>> get_absent_extent( + Transaction &t, + paddr_t offset, + extent_len_t length, + Func &&extent_init_func) { + return get_absent_extent<T>(t, offset, length, 0, length, + std::forward<Func>(extent_init_func)); + } + bool is_viewable_extent_stable( Transaction &t, CachedExtentRef extent) @@ -469,13 +448,19 @@ public: return view->is_data_stable(); } - using get_extent_ertr = base_ertr; - get_extent_ertr::future<CachedExtentRef> + get_extent_iertr::future<CachedExtentRef> get_extent_viewable_by_trans( Transaction &t, CachedExtentRef extent) { assert(extent->is_valid()); + + const auto t_src = t.get_src(); + auto ext_type = extent->get_type(); + extent_access_stats_t& access_stats = get_by_ext( + get_by_src(stats.access_by_src_ext, t_src), + ext_type); + CachedExtent* p_extent; if (extent->is_stable()) { p_extent = extent->get_transactional_view(t); @@ -483,10 +468,12 @@ public: assert(!extent->is_stable_writting()); assert(p_extent->is_pending_in_trans(t.get_trans_id())); assert(!p_extent->is_stable_writting()); + ++access_stats.trans_pending; + ++stats.access.s.trans_pending; if (p_extent->is_mutable()) { assert(p_extent->is_fully_loaded()); assert(!p_extent->is_pending_io()); - return get_extent_ertr::make_ready_future<CachedExtentRef>( + return get_extent_iertr::make_ready_future<CachedExtentRef>( CachedExtentRef(p_extent)); } else { assert(p_extent->is_exist_clean()); @@ -495,57 +482,101 @@ public: // stable from trans-view assert(!p_extent->is_pending_in_trans(t.get_trans_id())); if (t.maybe_add_to_read_set(p_extent)) { - touch_extent(*p_extent); + if (p_extent->is_dirty()) { + ++access_stats.cache_dirty; + ++stats.access.s.cache_dirty; + } else { + ++access_stats.cache_lru; + ++stats.access.s.cache_lru; + } + touch_extent(*p_extent, &t_src, t.get_cache_hint()); + } else { + if (p_extent->is_dirty()) { + ++access_stats.trans_dirty; + ++stats.access.s.trans_dirty; + } else { + ++access_stats.trans_lru; + ++stats.access.s.trans_lru; + } } } } else { assert(!extent->is_stable_writting()); assert(extent->is_pending_in_trans(t.get_trans_id())); + ++access_stats.trans_pending; + ++stats.access.s.trans_pending; if (extent->is_mutable()) { assert(extent->is_fully_loaded()); assert(!extent->is_pending_io()); - return get_extent_ertr::make_ready_future<CachedExtentRef>(extent); + return get_extent_iertr::make_ready_future<CachedExtentRef>(extent); } else { assert(extent->is_exist_clean()); p_extent = extent.get(); } } - assert(p_extent->is_stable() || p_extent->is_exist_clean()); // user should not see RETIRED_PLACEHOLDER extents - ceph_assert(p_extent->get_type() != extent_types_t::RETIRED_PLACEHOLDER); - if (!p_extent->is_fully_loaded()) { - assert(!p_extent->is_mutable()); - LOG_PREFIX(Cache::get_extent_viewable_by_trans); - SUBDEBUG(seastore_cache, - "{} {}~{} is present without been fully loaded, reading ... -- {}", - p_extent->get_type(), p_extent->get_paddr(),p_extent->get_length(), - *p_extent); - auto bp = alloc_cache_buf(p_extent->get_length()); - p_extent->set_bptr(std::move(bp)); - return read_extent<CachedExtent>(CachedExtentRef(p_extent)); - } - return p_extent->wait_io( - ).then([p_extent] { - return get_extent_ertr::make_ready_future<CachedExtentRef>( + ceph_assert(!is_retired_placeholder_type(p_extent->get_type())); + // for logical extents, handle partial load in TM::read_pin(), + // also see read_extent_maybe_partial() and get_absent_extent() + assert(is_logical_type(p_extent->get_type()) || + p_extent->is_fully_loaded()); + + return trans_intr::make_interruptible( + p_extent->wait_io() + ).then_interruptible([p_extent] { + return get_extent_iertr::make_ready_future<CachedExtentRef>( CachedExtentRef(p_extent)); }); } template <typename T> - using read_extent_ret = get_extent_ertr::future<TCachedExtentRef<T>>; - - template <typename T> - read_extent_ret<T> get_extent_viewable_by_trans( + get_extent_iertr::future<TCachedExtentRef<T>> + get_extent_viewable_by_trans( Transaction &t, TCachedExtentRef<T> extent) { return get_extent_viewable_by_trans(t, CachedExtentRef(extent.get()) - ).safe_then([](auto p_extent) { + ).si_then([](auto p_extent) { return p_extent->template cast<T>(); }); } + // wait extent io or do partial reads + template <typename T> + get_extent_iertr::future<TCachedExtentRef<T>> + read_extent_maybe_partial( + Transaction &t, + TCachedExtentRef<T> extent, + extent_len_t partial_off, + extent_len_t partial_len) { + assert(is_logical_type(extent->get_type())); + if (!extent->is_range_loaded(partial_off, partial_len)) { + LOG_PREFIX(Cache::read_extent_maybe_partial); + SUBDEBUGT(seastore_cache, + "{} {}~0x{:x} is present on t without range 0x{:x}~0x{:x}, reading ... -- {}", + t, extent->get_type(), extent->get_paddr(), extent->get_length(), + partial_off, partial_len, *extent); + const auto t_src = t.get_src(); + extent_access_stats_t& access_stats = get_by_ext( + get_by_src(stats.access_by_src_ext, t_src), + extent->get_type()); + ++access_stats.load_present; + ++stats.access.s.load_present; + return trans_intr::make_interruptible( + do_read_extent_maybe_partial( + std::move(extent), partial_off, partial_len, &t_src)); + } else { + // TODO(implement fine-grained-wait): + // the range might be already loaded, but we don't know + return trans_intr::make_interruptible( + extent->wait_io() + ).then_interruptible([extent] { + return get_extent_iertr::make_ready_future<TCachedExtentRef<T>>(extent); + }); + } + } + extent_len_t get_block_size() const { return epm.get_block_size(); } @@ -553,60 +584,127 @@ public: // Interfaces only for tests. public: CachedExtentRef test_query_cache(paddr_t offset) { - return query_cache(offset, nullptr); + return query_cache(offset); } private: + using get_extent_ertr = base_ertr; + template <typename T> + using read_extent_ret = get_extent_ertr::future<TCachedExtentRef<T>>; + /// Implements exclusive call to read_extent() for the extent + template <typename T> + read_extent_ret<T> do_read_extent_maybe_partial( + TCachedExtentRef<T>&& extent, + extent_len_t partial_off, + extent_len_t partial_len, + const Transaction::src_t* p_src) + { + LOG_PREFIX(Cache::do_read_extent_maybe_partial); + // They must be atomic: + // 1. checking missing range and wait io + // 2. checking missing range and read + // because the extents in Caches can be accessed concurrently + // + // TODO(implement fine-grained-wait) + assert(!extent->is_range_loaded(partial_off, partial_len)); + assert(!extent->is_mutable()); + if (extent->is_pending_io()) { + std::optional<Transaction::src_t> src; + if (p_src) { + src = *p_src; + } + auto* p_extent = extent.get(); + return p_extent->wait_io( + ).then([extent=std::move(extent), partial_off, partial_len, this, FNAME, src]() mutable + -> read_extent_ret<T> { + if (extent->is_range_loaded(partial_off, partial_len)) { + SUBDEBUG(seastore_cache, + "{} {}~0x{:x} got range 0x{:x}~0x{:x} ... -- {}", + extent->get_type(), extent->get_paddr(), extent->get_length(), + partial_off, partial_len, *extent); + // we don't know whether the target range is loading or not + if (extent->is_pending_io()) { + auto* p_extent = extent.get(); + return p_extent->wait_io( + ).then([extent=std::move(extent)]() mutable { + return seastar::make_ready_future<TCachedExtentRef<T>>(std::move(extent)); + }); + } else { + return seastar::make_ready_future<TCachedExtentRef<T>>(std::move(extent)); + } + } else { // range not loaded + SUBDEBUG(seastore_cache, + "{} {}~0x{:x} without range 0x{:x}~0x{:x} ... -- {}", + extent->get_type(), extent->get_paddr(), extent->get_length(), + partial_off, partial_len, *extent); + Transaction::src_t* p_src = (src.has_value() ? &src.value() : nullptr); + return do_read_extent_maybe_partial( + std::move(extent), partial_off, partial_len, p_src); + } + }); + } else { + SUBDEBUG(seastore_cache, + "{} {}~0x{:x} is not pending without range 0x{:x}~0x{:x}, reading ... -- {}", + extent->get_type(), extent->get_paddr(), extent->get_length(), + partial_off, partial_len, *extent); + return read_extent<T>( + std::move(extent), partial_off, partial_len, p_src); + } + } + /** * do_get_caching_extent * * returns ref to extent at offset~length of type T either from * - extent_set if already in cache * - disk + * only load partial_off~partial_len */ using src_ext_t = std::pair<Transaction::src_t, extent_types_t>; template <typename T, typename Func, typename OnCache> read_extent_ret<T> do_get_caching_extent( paddr_t offset, ///< [in] starting addr extent_len_t length, ///< [in] length - const src_ext_t* p_src_ext, ///< [in] cache query metric key + extent_len_t partial_off, ///< [in] offset of piece in extent + extent_len_t partial_len, ///< [in] length of piece in extent Func &&extent_init_func, ///< [in] init func for extent - OnCache &&on_cache + OnCache &&on_cache, + const Transaction::src_t* p_src ) { LOG_PREFIX(Cache::do_get_caching_extent); - auto cached = query_cache(offset, p_src_ext); + auto cached = query_cache(offset); if (!cached) { - auto ret = CachedExtent::make_cached_extent_ref<T>( - alloc_cache_buf(length)); + // partial read + TCachedExtentRef<T> ret = CachedExtent::make_cached_extent_ref<T>(length); ret->init(CachedExtent::extent_state_t::CLEAN_PENDING, offset, PLACEMENT_HINT_NULL, NULL_GENERATION, TRANS_ID_NULL); SUBDEBUG(seastore_cache, - "{} {}~{} is absent, add extent and reading ... -- {}", - T::TYPE, offset, length, *ret); - const auto p_src = p_src_ext ? &p_src_ext->first : nullptr; - add_extent(ret, p_src); + "{} {}~0x{:x} is absent, add extent and reading range 0x{:x}~0x{:x} ... -- {}", + T::TYPE, offset, length, partial_off, partial_len, *ret); + add_extent(ret); + // touch_extent() should be included in on_cache on_cache(*ret); extent_init_func(*ret); return read_extent<T>( - std::move(ret)); + std::move(ret), partial_off, partial_len, p_src); } // extent PRESENT in cache - if (cached->get_type() == extent_types_t::RETIRED_PLACEHOLDER) { - auto ret = CachedExtent::make_cached_extent_ref<T>( - alloc_cache_buf(length)); + if (is_retired_placeholder_type(cached->get_type())) { + // partial read + TCachedExtentRef<T> ret = CachedExtent::make_cached_extent_ref<T>(length); ret->init(CachedExtent::extent_state_t::CLEAN_PENDING, offset, PLACEMENT_HINT_NULL, NULL_GENERATION, TRANS_ID_NULL); SUBDEBUG(seastore_cache, - "{} {}~{} is absent(placeholder), reading ... -- {}", - T::TYPE, offset, length, *ret); - extents.replace(*ret, *cached); + "{} {}~0x{:x} is absent(placeholder), add extent and reading range 0x{:x}~0x{:x} ... -- {}", + T::TYPE, offset, length, partial_off, partial_len, *ret); + extents_index.replace(*ret, *cached); on_cache(*ret); // replace placeholder in transactions @@ -618,34 +716,41 @@ private: cached->state = CachedExtent::extent_state_t::INVALID; extent_init_func(*ret); return read_extent<T>( - std::move(ret)); - } else if (!cached->is_fully_loaded()) { - auto ret = TCachedExtentRef<T>(static_cast<T*>(cached.get())); - on_cache(*ret); - SUBDEBUG(seastore_cache, - "{} {}~{} is present without been fully loaded, reading ... -- {}", - T::TYPE, offset, length, *ret); - auto bp = alloc_cache_buf(length); - ret->set_bptr(std::move(bp)); - return read_extent<T>( - std::move(ret)); - } else { + std::move(ret), partial_off, partial_len, p_src); + } + + auto ret = TCachedExtentRef<T>(static_cast<T*>(cached.get())); + on_cache(*ret); + if (ret->is_range_loaded(partial_off, partial_len)) { SUBTRACE(seastore_cache, - "{} {}~{} is present in cache -- {}", - T::TYPE, offset, length, *cached); - auto ret = TCachedExtentRef<T>(static_cast<T*>(cached.get())); - on_cache(*ret); - return ret->wait_io( - ).then([ret=std::move(ret)]() mutable - -> read_extent_ret<T> { + "{} {}~0x{:x} is present with range 0x{:x}~0x{:x} ... -- {}", + T::TYPE, offset, length, partial_off, partial_len, *ret); + return ret->wait_io().then([ret] { // ret may be invalid, caller must check - return read_extent_ret<T>( - get_extent_ertr::ready_future_marker{}, - std::move(ret)); + return seastar::make_ready_future<TCachedExtentRef<T>>(ret); }); + } else { + SUBDEBUG(seastore_cache, + "{} {}~0x{:x} is present without range 0x{:x}~0x{:x}, reading ... -- {}", + T::TYPE, offset, length, partial_off, partial_len, *ret); + return do_read_extent_maybe_partial( + std::move(ret), partial_off, partial_len, p_src); } } + template <typename T, typename Func, typename OnCache> + read_extent_ret<T> do_get_caching_extent( + paddr_t offset, ///< [in] starting addr + extent_len_t length, ///< [in] length + Func &&extent_init_func, ///< [in] init func for extent + OnCache &&on_cache, + const Transaction::src_t* p_src + ) { + return do_get_caching_extent<T>(offset, length, 0, length, + std::forward<Func>(extent_init_func), + std::forward<OnCache>(on_cache), + p_src); + } // This is a workaround std::move_only_function not being available, // not really worth generalizing at this time. @@ -680,11 +785,18 @@ private: paddr_t offset, laddr_t laddr, extent_len_t length, - const Transaction::src_t* p_src, extent_init_func_t &&extent_init_func, - extent_init_func_t &&on_cache - ); + extent_init_func_t &&on_cache, + const Transaction::src_t* p_src); + /** + * get_caching_extent_by_type + * + * Note, the current implementation leverages parent-child + * pointers in LBA instead, so it should only be called in tests. + * + * This path won't be accounted by the cache_access_stats_t. + */ using get_extent_by_type_iertr = get_extent_iertr; using get_extent_by_type_ret = get_extent_by_type_iertr::future< CachedExtentRef>; @@ -697,41 +809,39 @@ private: extent_init_func_t &&extent_init_func ) { LOG_PREFIX(Cache::get_caching_extent_by_type); + const auto t_src = t.get_src(); CachedExtentRef ret; auto status = t.get_extent(offset, &ret); if (status == Transaction::get_extent_ret::RETIRED) { - SUBERRORT(seastore_cache, "{} {}~{} {} is retired on t -- {}", + SUBERRORT(seastore_cache, "{} {}~0x{:x} {} is retired on t -- {}", t, type, offset, length, laddr, *ret); ceph_abort("impossible"); } else if (status == Transaction::get_extent_ret::PRESENT) { + assert(ret->get_length() == length); if (ret->is_fully_loaded()) { - SUBTRACET(seastore_cache, "{} {}~{} {} is present on t -- {}", + SUBTRACET(seastore_cache, "{} {}~0x{:x} {} is present on t -- {}", t, type, offset, length, laddr, *ret); return ret->wait_io().then([ret] { return seastar::make_ready_future<CachedExtentRef>(ret); }); } else { - assert(!ret->is_mutable()); - touch_extent(*ret); - SUBDEBUGT(seastore_cache, "{} {}~{} {} is present on t without been \ - fully loaded, reading ...", t, type, offset, length, laddr); - auto bp = alloc_cache_buf(ret->get_length()); - ret->set_bptr(std::move(bp)); - return read_extent<CachedExtent>( - std::move(ret)); + SUBDEBUGT(seastore_cache, + "{} {}~0x{:x} {} is present on t without fully loaded, reading ... -- {}", + t, type, offset, length, laddr, *ret); + return do_read_extent_maybe_partial<CachedExtent>( + std::move(ret), 0, length, &t_src); } } else { - SUBTRACET(seastore_cache, "{} {}~{} {} is absent on t, query cache ...", + SUBTRACET(seastore_cache, "{} {}~0x{:x} {} is absent on t, query cache ...", t, type, offset, length, laddr); - auto f = [&t, this](CachedExtent &ext) { + auto f = [&t, this, t_src](CachedExtent &ext) { t.add_to_read_set(CachedExtentRef(&ext)); - touch_extent(ext); + touch_extent(ext, &t_src, t.get_cache_hint()); }; - auto src = t.get_src(); return trans_intr::make_interruptible( do_get_caching_extent_by_type( - type, offset, laddr, length, &src, - std::move(extent_init_func), std::move(f)) + type, offset, laddr, length, + std::move(extent_init_func), std::move(f), &t_src) ); } } @@ -755,17 +865,25 @@ private: } #endif - SUBTRACET(seastore_cache, "{} {}~{} {} is absent on t, query cache ...", + SUBTRACET(seastore_cache, "{} {}~0x{:x} {} is absent on t, query cache ...", t, type, offset, length, laddr); - auto f = [&t, this](CachedExtent &ext) { + const auto t_src = t.get_src(); + auto f = [&t, this, t_src](CachedExtent &ext) { + // FIXME: assert(ext.is_stable_clean()); + assert(ext.is_stable()); + extent_access_stats_t& access_stats = get_by_ext( + get_by_src(stats.access_by_src_ext, t_src), + ext.get_type()); + ++access_stats.load_absent; + ++stats.access.s.load_absent; + t.add_to_read_set(CachedExtentRef(&ext)); - touch_extent(ext); + touch_extent(ext, &t_src, t.get_cache_hint()); }; - auto src = t.get_src(); return trans_intr::make_interruptible( do_get_caching_extent_by_type( - type, offset, laddr, length, &src, - std::move(extent_init_func), std::move(f)) + type, offset, laddr, length, + std::move(extent_init_func), std::move(f), &t_src) ); } @@ -787,7 +905,7 @@ private: for (auto it = start_iter; it != end_iter; it++) { - res.emplace(it->paddr, it->laddr, it->len, it->type, it->seq); + res.emplace(it->paddr, it->laddr, it->len, it->type); } return res; } @@ -886,7 +1004,7 @@ public: #endif ) { LOG_PREFIX(Cache::alloc_new_non_data_extent); - SUBTRACET(seastore_cache, "allocate {} {}B, hint={}, gen={}", + SUBTRACET(seastore_cache, "allocate {} 0x{:x}B, hint={}, gen={}", t, T::TYPE, length, hint, rewrite_gen_printer_t{gen}); #ifdef UNIT_TESTS_BUILT auto result = epm.alloc_new_non_data_extent(t, T::TYPE, length, hint, gen, epaddr); @@ -894,7 +1012,8 @@ public: auto result = epm.alloc_new_non_data_extent(t, T::TYPE, length, hint, gen); #endif if (!result) { - return nullptr; + SUBERRORT(seastore_cache, "insufficient space", t); + std::rethrow_exception(crimson::ct_error::enospc::exception_ptr()); } auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result->bp)); ret->init(CachedExtent::extent_state_t::INITIAL_WRITE_PENDING, @@ -904,7 +1023,7 @@ public: t.get_trans_id()); t.add_fresh_extent(ret); SUBDEBUGT(seastore_cache, - "allocated {} {}B extent at {}, hint={}, gen={} -- {}", + "allocated {} 0x{:x}B extent at {}, hint={}, gen={} -- {}", t, T::TYPE, length, result->paddr, hint, rewrite_gen_printer_t{result->gen}, *ret); return ret; @@ -928,13 +1047,17 @@ public: #endif ) { LOG_PREFIX(Cache::alloc_new_data_extents); - SUBTRACET(seastore_cache, "allocate {} {}B, hint={}, gen={}", + SUBTRACET(seastore_cache, "allocate {} 0x{:x}B, hint={}, gen={}", t, T::TYPE, length, hint, rewrite_gen_printer_t{gen}); #ifdef UNIT_TESTS_BUILT auto results = epm.alloc_new_data_extents(t, T::TYPE, length, hint, gen, epaddr); #else auto results = epm.alloc_new_data_extents(t, T::TYPE, length, hint, gen); #endif + if (results.empty()) { + SUBERRORT(seastore_cache, "insufficient space", t); + std::rethrow_exception(crimson::ct_error::enospc::exception_ptr()); + } std::vector<TCachedExtentRef<T>> extents; for (auto &result : results) { auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result.bp)); @@ -945,7 +1068,7 @@ public: t.get_trans_id()); t.add_fresh_extent(ret); SUBDEBUGT(seastore_cache, - "allocated {} {}B extent at {}, hint={}, gen={} -- {}", + "allocated {} 0x{:x}B extent at {}, hint={}, gen={} -- {}", t, T::TYPE, length, result.paddr, hint, rewrite_gen_printer_t{result.gen}, *ret); extents.emplace_back(std::move(ret)); @@ -972,15 +1095,14 @@ public: TCachedExtentRef<T> ext; if (original_bptr.has_value()) { // shallow copy the buffer from original extent - auto nbp = ceph::bufferptr( - *original_bptr, - remap_laddr - original_laddr, - remap_length); + auto remap_offset = remap_laddr.get_byte_distance< + extent_len_t>(original_laddr); + auto nbp = ceph::bufferptr(*original_bptr, remap_offset, remap_length); // ExtentPlacementManager::alloc_new_extent will make a new // (relative/temp) paddr, so make extent directly ext = CachedExtent::make_cached_extent_ref<T>(std::move(nbp)); } else { - ext = CachedExtent::make_placeholder_cached_extent_ref<T>(remap_length); + ext = CachedExtent::make_cached_extent_ref<T>(remap_length); } ext->init(CachedExtent::extent_state_t::EXIST_CLEAN, @@ -992,7 +1114,7 @@ public: auto extent = ext->template cast<T>(); extent->set_laddr(remap_laddr); t.add_fresh_extent(ext); - SUBTRACET(seastore_cache, "allocated {} {}B, hint={}, has ptr? {} -- {}", + SUBTRACET(seastore_cache, "allocated {} 0x{:x}B, hint={}, has ptr? {} -- {}", t, T::TYPE, remap_length, remap_laddr, original_bptr.has_value(), *extent); return extent; } @@ -1135,10 +1257,10 @@ public: { LOG_PREFIX(Cache::init_cached_extents); SUBINFOT(seastore_cache, - "start with {}({}B) extents, {} dirty, dirty_from={}, alloc_from={}", + "start with {}(0x{:x}B) extents, {} dirty, dirty_from={}, alloc_from={}", t, - extents.size(), - extents.get_bytes(), + extents_index.size(), + extents_index.get_bytes(), dirty.size(), get_oldest_dirty_from().value_or(JOURNAL_SEQ_NULL), get_oldest_backref_dirty_from().value_or(JOURNAL_SEQ_NULL)); @@ -1147,7 +1269,7 @@ public: // Cache::root should have been inserted to the dirty list assert(root->is_dirty()); std::vector<CachedExtentRef> _dirty; - for (auto &e : extents) { + for (auto &e : extents_index) { _dirty.push_back(CachedExtentRef(&e)); } return seastar::do_with( @@ -1164,7 +1286,7 @@ public: ).si_then([this, FNAME, &t, e](bool is_alive) { if (!is_alive) { SUBDEBUGT(seastore_cache, "extent is not alive, remove extent -- {}", t, *e); - remove_extent(e); + remove_extent(e, nullptr); e->set_invalid(t); } else { SUBDEBUGT(seastore_cache, "extent is alive -- {}", t, *e); @@ -1178,10 +1300,10 @@ public: } ).si_then([this, FNAME, &t] { SUBINFOT(seastore_cache, - "finish with {}({}B) extents, {} dirty, dirty_from={}, alloc_from={}", + "finish with {}(0x{:x}B) extents, {} dirty, dirty_from={}, alloc_from={}", t, - extents.size(), - extents.get_bytes(), + extents_index.size(), + extents_index.get_bytes(), dirty.size(), get_oldest_dirty_from().value_or(JOURNAL_SEQ_NULL), get_oldest_backref_dirty_from().value_or(JOURNAL_SEQ_NULL)); @@ -1352,21 +1474,20 @@ private: /// Update lru for access to ref void touch_extent( CachedExtent &ext, - const Transaction::src_t* p_src=nullptr) + const Transaction::src_t* p_src, + cache_hint_t hint) { - if (p_src && - is_background_transaction(*p_src) && - is_logical_type(ext.get_type())) { + if (hint == CACHE_HINT_NOCACHE && is_logical_type(ext.get_type())) { return; } if (ext.is_stable_clean() && !ext.is_placeholder()) { - lru.move_to_top(ext); + lru.move_to_top(ext, p_src); } } ExtentPlacementManager& epm; RootBlockRef root; ///< ref to current root - ExtentIndex extents; ///< set of live extents + ExtentIndex extents_index; ///< set of live extents journal_seq_t last_commit = JOURNAL_SEQ_MIN; @@ -1380,7 +1501,7 @@ private: * * holds refs to dirty extents. Ordered by CachedExtent::get_dirty_from(). */ - CachedExtent::list dirty; + CachedExtent::primary_ref_list dirty; using backref_extent_entry_query_set_t = std::set< @@ -1416,6 +1537,7 @@ private: friend class crimson::os::seastore::backref::BtreeBackrefManager; friend class crimson::os::seastore::BackrefManager; + /** * lru * @@ -1426,71 +1548,139 @@ private: const size_t capacity = 0; // current size (bytes) - size_t contents = 0; + size_t current_size = 0; + + counter_by_extent_t<cache_size_stats_t> sizes_by_ext; + cache_io_stats_t overall_io; + counter_by_src_t<counter_by_extent_t<cache_io_stats_t> > + trans_io_by_src_ext; + + mutable cache_io_stats_t last_overall_io; + mutable cache_io_stats_t last_trans_io; + mutable counter_by_src_t<counter_by_extent_t<cache_io_stats_t> > + last_trans_io_by_src_ext; - CachedExtent::list lru; + CachedExtent::primary_ref_list lru; - void trim_to_capacity() { - while (contents > capacity) { - assert(lru.size() > 0); - remove_from_lru(lru.front()); + void do_remove_from_lru( + CachedExtent &extent, + const Transaction::src_t* p_src) { + assert(extent.is_stable_clean() && !extent.is_placeholder()); + assert(extent.primary_ref_list_hook.is_linked()); + assert(lru.size() > 0); + auto extent_loaded_length = extent.get_loaded_length(); + assert(current_size >= extent_loaded_length); + + lru.erase(lru.s_iterator_to(extent)); + current_size -= extent_loaded_length; + get_by_ext(sizes_by_ext, extent.get_type()).account_out(extent_loaded_length); + overall_io.out_sizes.account_in(extent_loaded_length); + if (p_src) { + get_by_ext( + get_by_src(trans_io_by_src_ext, *p_src), + extent.get_type() + ).out_sizes.account_in(extent_loaded_length); } + intrusive_ptr_release(&extent); } - void add_to_lru(CachedExtent &extent) { - assert(extent.is_stable_clean() && !extent.is_placeholder()); - - if (!extent.primary_ref_list_hook.is_linked()) { - contents += extent.get_length(); - intrusive_ptr_add_ref(&extent); - lru.push_back(extent); + void trim_to_capacity( + const Transaction::src_t* p_src) { + while (current_size > capacity) { + do_remove_from_lru(lru.front(), p_src); } - trim_to_capacity(); } public: LRU(size_t capacity) : capacity(capacity) {} - size_t get_capacity() const { + size_t get_capacity_bytes() const { return capacity; } - size_t get_current_contents_bytes() const { - return contents; + size_t get_current_size_bytes() const { + return current_size; } - size_t get_current_contents_extents() const { + size_t get_current_num_extents() const { return lru.size(); } + void get_stats( + cache_stats_t &stats, + bool report_detail, + double seconds) const; + void remove_from_lru(CachedExtent &extent) { assert(extent.is_stable_clean() && !extent.is_placeholder()); if (extent.primary_ref_list_hook.is_linked()) { - lru.erase(lru.s_iterator_to(extent)); - assert(contents >= extent.get_length()); - contents -= extent.get_length(); - intrusive_ptr_release(&extent); + do_remove_from_lru(extent, nullptr); } } - void move_to_top(CachedExtent &extent) { + void move_to_top( + CachedExtent &extent, + const Transaction::src_t* p_src) { assert(extent.is_stable_clean() && !extent.is_placeholder()); + auto extent_loaded_length = extent.get_loaded_length(); + if (extent.primary_ref_list_hook.is_linked()) { + // present, move to top (back) + assert(lru.size() > 0); + assert(current_size >= extent_loaded_length); + lru.erase(lru.s_iterator_to(extent)); + lru.push_back(extent); + } else { + // absent, add to top (back) + if (extent_loaded_length > 0) { + current_size += extent_loaded_length; + get_by_ext(sizes_by_ext, extent.get_type()).account_in(extent_loaded_length); + overall_io.in_sizes.account_in(extent_loaded_length); + if (p_src) { + get_by_ext( + get_by_src(trans_io_by_src_ext, *p_src), + extent.get_type() + ).in_sizes.account_in(extent_loaded_length); + } + } // else: the extent isn't loaded upon touch_extent()/on_cache(), + // account the io later in increase_cached_size() upon read_extent() + intrusive_ptr_add_ref(&extent); + lru.push_back(extent); + + trim_to_capacity(p_src); + } + } + + void increase_cached_size( + CachedExtent &extent, + extent_len_t increased_length, + const Transaction::src_t* p_src) { + assert(!extent.is_mutable()); + if (extent.primary_ref_list_hook.is_linked()) { - lru.erase(lru.s_iterator_to(extent)); - intrusive_ptr_release(&extent); - assert(contents >= extent.get_length()); - contents -= extent.get_length(); + assert(extent.is_stable_clean() && !extent.is_placeholder()); + // present, increase size + assert(lru.size() > 0); + current_size += increased_length; + get_by_ext(sizes_by_ext, extent.get_type()).account_in(increased_length); + overall_io.in_sizes.account_in(increased_length); + if (p_src) { + get_by_ext( + get_by_src(trans_io_by_src_ext, *p_src), + extent.get_type() + ).in_sizes.account_in(increased_length); + } + + trim_to_capacity(nullptr); } - add_to_lru(extent); } void clear() { LOG_PREFIX(Cache::LRU::clear); for (auto iter = lru.begin(); iter != lru.end();) { SUBDEBUG(seastore_cache, "clearing {}", *iter); - remove_from_lru(*(iter++)); + do_remove_from_lru(*(iter++), nullptr); } } @@ -1504,9 +1694,6 @@ private: uint64_t hit = 0; }; - template <typename CounterT> - using counter_by_extent_t = std::array<CounterT, EXTENT_TYPES_MAX>; - struct invalid_trans_efforts_t { io_stat_t read; io_stat_t mutate; @@ -1559,9 +1746,18 @@ private: counter_by_src_t<uint64_t> trans_created_by_src; counter_by_src_t<commit_trans_efforts_t> committed_efforts_by_src; counter_by_src_t<invalid_trans_efforts_t> invalidated_efforts_by_src; - counter_by_src_t<query_counters_t> cache_query_by_src; success_read_trans_efforts_t success_read_efforts; + uint64_t dirty_bytes = 0; + counter_by_extent_t<cache_size_stats_t> dirty_sizes_by_ext; + dirty_io_stats_t dirty_io; + counter_by_src_t<counter_by_extent_t<dirty_io_stats_t> > + dirty_io_by_src_ext; + + cache_access_stats_t access; + counter_by_src_t<uint64_t> cache_absent_by_src; + counter_by_src_t<counter_by_extent_t<extent_access_stats_t> > + access_by_src_ext; uint64_t onode_tree_depth = 0; int64_t onode_tree_extents_num = 0; @@ -1586,18 +1782,19 @@ private: std::array<uint64_t, NUM_SRC_COMB> trans_conflicts_by_srcs; counter_by_src_t<uint64_t> trans_conflicts_by_unknown; - version_stat_t committed_dirty_version; - version_stat_t committed_reclaim_version; + rewrite_stats_t trim_rewrites; + rewrite_stats_t reclaim_rewrites; } stats; - template <typename CounterT> - CounterT& get_by_ext( - counter_by_extent_t<CounterT>& counters_by_ext, - extent_types_t ext) { - auto index = static_cast<uint8_t>(ext); - assert(index < EXTENT_TYPES_MAX); - return counters_by_ext[index]; - } + mutable dirty_io_stats_t last_dirty_io; + mutable counter_by_src_t<counter_by_extent_t<dirty_io_stats_t> > + last_dirty_io_by_src_ext; + mutable rewrite_stats_t last_trim_rewrites; + mutable rewrite_stats_t last_reclaim_rewrites; + mutable cache_access_stats_t last_access; + mutable counter_by_src_t<uint64_t> last_cache_absent_by_src; + mutable counter_by_src_t<counter_by_extent_t<extent_access_stats_t> > + last_access_by_src_ext; void account_conflict(Transaction::src_t src1, Transaction::src_t src2) { assert(src1 < Transaction::src_t::MAX); @@ -1630,33 +1827,55 @@ private: seastar::metrics::metric_group metrics; void register_metrics(); - /// alloc buffer for cached extent - bufferptr alloc_cache_buf(size_t size) { - // TODO: memory pooling etc - auto bp = ceph::bufferptr( - buffer::create_page_aligned(size)); - bp.zero(); - return bp; + void apply_backref_mset( + backref_entry_refs_t& backref_entries) { + for (auto& entry : backref_entries) { + backref_entry_mset.insert(*entry); + } } - void backref_batch_update( - std::vector<backref_entry_ref> &&, - const journal_seq_t &); + void apply_backref_byseq( + backref_entry_refs_t&& backref_entries, + const journal_seq_t& seq); + + void commit_backref_entries( + backref_entry_refs_t&& backref_entries, + const journal_seq_t& seq) { + apply_backref_mset(backref_entries); + apply_backref_byseq(std::move(backref_entries), seq); + } /// Add extent to extents handling dirty and refcounting - void add_extent(CachedExtentRef ref, const Transaction::src_t* t_src); + /// + /// Note, it must follows with add_to_dirty() or touch_extent(). + /// The only exception is RetiredExtentPlaceholder. + void add_extent(CachedExtentRef ref); /// Mark exising extent ref dirty -- mainly for replay void mark_dirty(CachedExtentRef ref); /// Add dirty extent to dirty list - void add_to_dirty(CachedExtentRef ref); + void add_to_dirty( + CachedExtentRef ref, + const Transaction::src_t* p_src); + + /// Replace the prev dirty extent by next + void replace_dirty( + CachedExtentRef next, + CachedExtentRef prev, + const Transaction::src_t& src); /// Remove from dirty list - void remove_from_dirty(CachedExtentRef ref); + void remove_from_dirty( + CachedExtentRef ref, + const Transaction::src_t* p_src); + + void clear_dirty(); /// Remove extent from extents handling dirty and refcounting - void remove_extent(CachedExtentRef ref); + void remove_extent( + CachedExtentRef ref, + const Transaction::src_t* p_src); /// Retire extent void commit_retire_extent(Transaction& t, CachedExtentRef ref); @@ -1674,39 +1893,74 @@ private: /// Introspect transaction when it is being destructed void on_transaction_destruct(Transaction& t); + /// Read the extent in range offset~length, + /// must be called exclusively for an extent, + /// also see do_read_extent_maybe_partial(). + /// + /// May return an invalid extent due to transaction conflict. template <typename T> read_extent_ret<T> read_extent( - TCachedExtentRef<T>&& extent + TCachedExtentRef<T>&& extent, + extent_len_t offset, + extent_len_t length, + const Transaction::src_t* p_src ) { + LOG_PREFIX(Cache::read_extent); assert(extent->state == CachedExtent::extent_state_t::CLEAN_PENDING || - extent->state == CachedExtent::extent_state_t::EXIST_CLEAN || - extent->state == CachedExtent::extent_state_t::CLEAN); + extent->state == CachedExtent::extent_state_t::EXIST_CLEAN || + extent->state == CachedExtent::extent_state_t::CLEAN); + assert(!extent->is_range_loaded(offset, length)); + assert(is_aligned(offset, get_block_size())); + assert(is_aligned(length, get_block_size())); extent->set_io_wait(); - return epm.read( - extent->get_paddr(), - extent->get_length(), - extent->get_bptr() - ).safe_then( - [extent=std::move(extent), this]() mutable { - LOG_PREFIX(Cache::read_extent); - if (likely(extent->state == CachedExtent::extent_state_t::CLEAN_PENDING)) { - extent->state = CachedExtent::extent_state_t::CLEAN; - } - ceph_assert(extent->state == CachedExtent::extent_state_t::EXIST_CLEAN - || extent->state == CachedExtent::extent_state_t::CLEAN - || !extent->is_valid()); - if (extent->is_valid()) { - // crc will be checked against LBA leaf entry for logical extents, - // or check against in-extent crc for physical extents. - if (epm.get_checksum_needed(extent->get_paddr())) { - extent->last_committed_crc = extent->calc_crc32c(); - } else { - extent->last_committed_crc = CRC_NULL; - } - extent->on_clean_read(); - } + auto old_length = extent->get_loaded_length(); + load_ranges_t to_read = extent->load_ranges(offset, length); + auto new_length = extent->get_loaded_length(); + assert(new_length > old_length); + lru.increase_cached_size(*extent, new_length - old_length, p_src); + return seastar::do_with(to_read.ranges, [extent, this, FNAME](auto &read_ranges) { + return ExtentPlacementManager::read_ertr::parallel_for_each( + read_ranges, [extent, this, FNAME](auto &read_range) { + SUBDEBUG(seastore_cache, "reading extent {} 0x{:x}~0x{:x} ...", + extent->get_paddr(), read_range.offset, read_range.get_length()); + assert(is_aligned(read_range.offset, get_block_size())); + assert(is_aligned(read_range.get_length(), get_block_size())); + return epm.read( + extent->get_paddr() + read_range.offset, + read_range.get_length(), + read_range.ptr); + }); + }).safe_then( + [this, FNAME, extent=std::move(extent), offset, length]() mutable { + if (likely(extent->state == CachedExtent::extent_state_t::CLEAN_PENDING)) { + extent->state = CachedExtent::extent_state_t::CLEAN; + } + ceph_assert(extent->state == CachedExtent::extent_state_t::EXIST_CLEAN + || extent->state == CachedExtent::extent_state_t::CLEAN + || !extent->is_valid()); + if (extent->is_valid()) { + if (extent->is_fully_loaded()) { + // crc will be checked against LBA leaf entry for logical extents, + // or check against in-extent crc for physical extents. + if (epm.get_checksum_needed(extent->get_paddr())) { + extent->last_committed_crc = extent->calc_crc32c(); + } else { + extent->last_committed_crc = CRC_NULL; + } + // on_clean_read() may change the content, call after calc_crc32c() + extent->on_clean_read(); + SUBDEBUG(seastore_cache, "read extent 0x{:x}~0x{:x} done -- {}", + offset, length, *extent); + } else { + extent->last_committed_crc = CRC_NULL; + SUBDEBUG(seastore_cache, "read extent 0x{:x}~0x{:x} done (partial) -- {}", + offset, length, *extent); + } + } else { + SUBDEBUG(seastore_cache, "read extent 0x{:x}~0x{:x} done (invalidated) -- {}", + offset, length, *extent); + } extent->complete_io(); - SUBDEBUG(seastore_cache, "read extent done -- {}", *extent); return get_extent_ertr::make_ready_future<TCachedExtentRef<T>>( std::move(extent)); }, @@ -1718,21 +1972,10 @@ private: } // Extents in cache may contain placeholders - CachedExtentRef query_cache( - paddr_t offset, - const src_ext_t* p_metric_key) { - query_counters_t* p_counters = nullptr; - if (p_metric_key) { - p_counters = &get_by_src(stats.cache_query_by_src, p_metric_key->first); - ++p_counters->access; - } - if (auto iter = extents.find_offset(offset); - iter != extents.end()) { - if (p_metric_key && - // retired_placeholder is not really cached yet - iter->get_type() != extent_types_t::RETIRED_PLACEHOLDER) { - ++p_counters->hit; - } + CachedExtentRef query_cache(paddr_t offset) { + if (auto iter = extents_index.find_offset(offset); + iter != extents_index.end()) { + assert(iter->is_stable()); return CachedExtentRef(&*iter); } else { return CachedExtentRef(); |