summaryrefslogtreecommitdiffstats
path: root/src/crimson/os/seastore/cache.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/crimson/os/seastore/cache.h')
-rw-r--r--src/crimson/os/seastore/cache.h905
1 files changed, 574 insertions, 331 deletions
diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h
index 4441df86d4e..a239b861726 100644
--- a/src/crimson/os/seastore/cache.h
+++ b/src/crimson/os/seastore/cache.h
@@ -3,13 +3,13 @@
#pragma once
-#include <iostream>
-
#include "seastar/core/shared_future.hh"
#include "include/buffer.h"
#include "crimson/common/errorator.h"
+#include "crimson/common/errorator-loop.h"
+#include "crimson/os/seastore/backref_entry.h"
#include "crimson/os/seastore/cached_extent.h"
#include "crimson/os/seastore/extent_placement_manager.h"
#include "crimson/os/seastore/logging.h"
@@ -37,86 +37,6 @@ class FixedKVBtree;
class BackrefManager;
class SegmentProvider;
-struct backref_entry_t {
- backref_entry_t(
- const paddr_t paddr,
- const laddr_t laddr,
- const extent_len_t len,
- const extent_types_t type,
- const journal_seq_t seq)
- : paddr(paddr),
- laddr(laddr),
- len(len),
- type(type),
- seq(seq)
- {}
- backref_entry_t(alloc_blk_t alloc_blk)
- : paddr(alloc_blk.paddr),
- laddr(alloc_blk.laddr),
- len(alloc_blk.len),
- type(alloc_blk.type)
- {}
- paddr_t paddr = P_ADDR_NULL;
- laddr_t laddr = L_ADDR_NULL;
- extent_len_t len = 0;
- extent_types_t type =
- extent_types_t::ROOT;
- journal_seq_t seq;
- friend bool operator< (
- const backref_entry_t &l,
- const backref_entry_t &r) {
- return l.paddr < r.paddr;
- }
- friend bool operator> (
- const backref_entry_t &l,
- const backref_entry_t &r) {
- return l.paddr > r.paddr;
- }
- friend bool operator== (
- const backref_entry_t &l,
- const backref_entry_t &r) {
- return l.paddr == r.paddr;
- }
-
- using set_hook_t =
- boost::intrusive::set_member_hook<
- boost::intrusive::link_mode<
- boost::intrusive::auto_unlink>>;
- set_hook_t backref_set_hook;
- using backref_set_member_options = boost::intrusive::member_hook<
- backref_entry_t,
- set_hook_t,
- &backref_entry_t::backref_set_hook>;
- using multiset_t = boost::intrusive::multiset<
- backref_entry_t,
- backref_set_member_options,
- boost::intrusive::constant_time_size<false>>;
-
- struct cmp_t {
- using is_transparent = paddr_t;
- bool operator()(
- const backref_entry_t &l,
- const backref_entry_t &r) const {
- return l.paddr < r.paddr;
- }
- bool operator()(const paddr_t l, const backref_entry_t &r) const {
- return l < r.paddr;
- }
- bool operator()(const backref_entry_t &l, const paddr_t r) const {
- return l.paddr < r;
- }
- };
-};
-
-std::ostream &operator<<(std::ostream &out, const backref_entry_t &ent);
-
-using backref_entry_ref = std::unique_ptr<backref_entry_t>;
-using backref_entry_mset_t = backref_entry_t::multiset_t;
-using backref_entry_refs_t = std::vector<backref_entry_ref>;
-using backref_entryrefs_by_seq_t = std::map<journal_seq_t, backref_entry_refs_t>;
-using backref_entry_query_set_t = std::set<
- backref_entry_t, backref_entry_t::cmp_t>;
-
/**
* Cache
*
@@ -167,7 +87,7 @@ using backref_entry_query_set_t = std::set<
* - Remove all extents in the retired_set from Cache::extents
* - Mark all extents in the write_set wait_io(), add promises to
* transaction
- * - Merge Transaction::write_set into Cache::extents
+ * - Merge Transaction::write_set into Cache::extents_index
*
* After phase 2, the user will submit the record to the journal.
* Once complete, we perform phase 3:
@@ -198,10 +118,13 @@ public:
Cache(ExtentPlacementManager &epm);
~Cache();
+ cache_stats_t get_stats(bool report_detail, double seconds) const;
+
/// Creates empty transaction by source
TransactionRef create_transaction(
Transaction::src_t src,
const char* name,
+ cache_hint_t cache_hint,
bool is_weak) {
LOG_PREFIX(Cache::create_transaction);
@@ -215,7 +138,8 @@ public:
[this](Transaction& t) {
return on_transaction_destruct(t);
},
- ++next_id
+ ++next_id,
+ cache_hint
);
SUBDEBUGT(seastore_t, "created name={}, source={}, is_weak={}",
*ret, name, src, is_weak);
@@ -270,6 +194,11 @@ public:
return t.root;
}
+ void account_absent_access(Transaction::src_t src) {
+ ++(get_by_src(stats.cache_absent_by_src, src));
+ ++stats.access.cache_absent;
+ }
+
/**
* get_extent_if_cached
*
@@ -285,12 +214,29 @@ public:
CachedExtentRef ret;
LOG_PREFIX(Cache::get_extent_if_cached);
auto result = t.get_extent(offset, &ret);
+ const auto t_src = t.get_src();
+ extent_access_stats_t& access_stats = get_by_ext(
+ get_by_src(stats.access_by_src_ext, t_src),
+ type);
if (result == Transaction::get_extent_ret::RETIRED) {
SUBDEBUGT(seastore_cache, "{} {} is retired on t -- {}",
t, type, offset, *ret);
return get_extent_if_cached_iertr::make_ready_future<
CachedExtentRef>(ret);
} else if (result == Transaction::get_extent_ret::PRESENT) {
+ if (ret->is_stable()) {
+ if (ret->is_dirty()) {
+ ++access_stats.trans_dirty;
+ ++stats.access.s.trans_dirty;
+ } else {
+ ++access_stats.trans_lru;
+ ++stats.access.s.trans_lru;
+ }
+ } else {
+ ++access_stats.trans_pending;
+ ++stats.access.s.trans_pending;
+ }
+
if (ret->is_fully_loaded()) {
SUBTRACET(seastore_cache, "{} {} is present on t -- {}",
t, type, offset, *ret);
@@ -299,27 +245,40 @@ public:
CachedExtentRef>(ret);
});
} else {
- SUBDEBUGT(seastore_cache, "{} {} is present on t -- {}"
- " without being fully loaded", t, type, offset, *ret);
+ SUBDEBUGT(seastore_cache,
+ "{} {} is present on t -- {} without fully loaded",
+ t, type, offset, *ret);
return get_extent_if_cached_iertr::make_ready_future<
CachedExtentRef>();
}
}
// get_extent_ret::ABSENT from transaction
- auto metric_key = std::make_pair(t.get_src(), type);
- ret = query_cache(offset, &metric_key);
+ ret = query_cache(offset);
if (!ret) {
SUBDEBUGT(seastore_cache, "{} {} is absent", t, type, offset);
+ account_absent_access(t_src);
return get_extent_if_cached_iertr::make_ready_future<CachedExtentRef>();
- } else if (ret->get_type() == extent_types_t::RETIRED_PLACEHOLDER) {
+ } else if (is_retired_placeholder_type(ret->get_type())) {
// retired_placeholder is not really cached yet
SUBDEBUGT(seastore_cache, "{} {} is absent(placeholder)",
t, type, offset);
+ account_absent_access(t_src);
return get_extent_if_cached_iertr::make_ready_future<CachedExtentRef>();
- } else if (!ret->is_fully_loaded()) {
- SUBDEBUGT(seastore_cache, "{} {} is present without "
- "being fully loaded", t, type, offset);
+ }
+
+ if (ret->is_dirty()) {
+ ++access_stats.cache_dirty;
+ ++stats.access.s.cache_dirty;
+ } else {
+ ++access_stats.cache_lru;
+ ++stats.access.s.cache_lru;
+ }
+
+ if (!ret->is_fully_loaded()) {
+ // ignore non-full extent
+ SUBDEBUGT(seastore_cache,
+ "{} {} is present without fully loaded", t, type, offset);
return get_extent_if_cached_iertr::make_ready_future<CachedExtentRef>();
}
@@ -327,7 +286,7 @@ public:
SUBDEBUGT(seastore_cache, "{} {} is present in cache -- {}",
t, type, offset, *ret);
t.add_to_read_set(ret);
- touch_extent(*ret);
+ touch_extent(*ret, &t_src, t.get_cache_hint());
return ret->wait_io().then([ret] {
return get_extent_if_cached_iertr::make_ready_future<
CachedExtentRef>(ret);
@@ -346,6 +305,8 @@ public:
*
* Note, the current implementation leverages parent-child
* pointers in LBA instead, so it should only be called in tests.
+ *
+ * This path won't be accounted by the cache_access_stats_t.
*/
using get_extent_iertr = base_iertr;
template <typename T>
@@ -356,41 +317,37 @@ public:
extent_len_t length) {
CachedExtentRef ret;
LOG_PREFIX(Cache::get_caching_extent);
+ const auto t_src = t.get_src();
auto result = t.get_extent(offset, &ret);
if (result == Transaction::get_extent_ret::RETIRED) {
- SUBERRORT(seastore_cache, "{} {}~{} is retired on t -- {}",
+ SUBERRORT(seastore_cache, "{} {}~0x{:x} is retired on t -- {}",
t, T::TYPE, offset, length, *ret);
ceph_abort("impossible");
} else if (result == Transaction::get_extent_ret::PRESENT) {
+ assert(ret->get_length() == length);
if (ret->is_fully_loaded()) {
- SUBTRACET(seastore_cache, "{} {}~{} is present on t -- {}",
+ SUBTRACET(seastore_cache, "{} {}~0x{:x} is present on t -- {}",
t, T::TYPE, offset, length, *ret);
return ret->wait_io().then([ret] {
return seastar::make_ready_future<TCachedExtentRef<T>>(
ret->cast<T>());
});
} else {
- assert(!ret->is_mutable());
- touch_extent(*ret);
- SUBDEBUGT(seastore_cache, "{} {}~{} is present on t without been \
- fully loaded, reading ... {}", t, T::TYPE, offset, length, *ret);
- auto bp = alloc_cache_buf(ret->get_length());
- ret->set_bptr(std::move(bp));
- return read_extent<T>(
- ret->cast<T>());
+ SUBDEBUGT(seastore_cache,
+ "{} {}~0x{:x} is present on t without fully loaded, reading ... -- {}",
+ t, T::TYPE, offset, length, *ret);
+ return do_read_extent_maybe_partial<T>(ret->cast<T>(), 0, length, &t_src);
}
} else {
- SUBTRACET(seastore_cache, "{} {}~{} is absent on t, query cache ...",
+ SUBTRACET(seastore_cache, "{} {}~0x{:x} is absent on t, query cache ...",
t, T::TYPE, offset, length);
- auto f = [&t, this](CachedExtent &ext) {
+ auto f = [&t, this, t_src](CachedExtent &ext) {
t.add_to_read_set(CachedExtentRef(&ext));
- touch_extent(ext);
+ touch_extent(ext, &t_src, t.get_cache_hint());
};
- auto metric_key = std::make_pair(t.get_src(), T::TYPE);
return trans_intr::make_interruptible(
do_get_caching_extent<T>(
- offset, length, &metric_key,
- [](T &){}, std::move(f))
+ offset, length, [](T &){}, std::move(f), &t_src)
);
}
}
@@ -399,12 +356,15 @@ public:
* get_absent_extent
*
* The extent in query is supposed to be absent in Cache.
+ * partially load buffer from partial_off~partial_len if not present.
*/
template <typename T, typename Func>
get_extent_iertr::future<TCachedExtentRef<T>> get_absent_extent(
Transaction &t,
paddr_t offset,
extent_len_t length,
+ extent_len_t partial_off,
+ extent_len_t partial_len,
Func &&extent_init_func) {
CachedExtentRef ret;
LOG_PREFIX(Cache::get_absent_extent);
@@ -417,17 +377,26 @@ public:
}
#endif
- SUBTRACET(seastore_cache, "{} {}~{} is absent on t, query cache ...",
+ SUBTRACET(seastore_cache, "{} {}~0x{:x} is absent on t, query cache ...",
t, T::TYPE, offset, length);
- auto f = [&t, this](CachedExtent &ext) {
+ const auto t_src = t.get_src();
+ auto f = [&t, this, t_src](CachedExtent &ext) {
+ // FIXME: assert(ext.is_stable_clean());
+ assert(ext.is_stable());
+ assert(T::TYPE == ext.get_type());
+ extent_access_stats_t& access_stats = get_by_ext(
+ get_by_src(stats.access_by_src_ext, t_src),
+ T::TYPE);
+ ++access_stats.load_absent;
+ ++stats.access.s.load_absent;
+
t.add_to_read_set(CachedExtentRef(&ext));
- touch_extent(ext);
+ touch_extent(ext, &t_src, t.get_cache_hint());
};
- auto metric_key = std::make_pair(t.get_src(), T::TYPE);
return trans_intr::make_interruptible(
do_get_caching_extent<T>(
- offset, length, &metric_key,
- std::forward<Func>(extent_init_func), std::move(f))
+ offset, length, partial_off, partial_len,
+ std::forward<Func>(extent_init_func), std::move(f), &t_src)
);
}
@@ -451,6 +420,16 @@ public:
return get_absent_extent<T>(t, offset, length, [](T &){});
}
+ template <typename T, typename Func>
+ get_extent_iertr::future<TCachedExtentRef<T>> get_absent_extent(
+ Transaction &t,
+ paddr_t offset,
+ extent_len_t length,
+ Func &&extent_init_func) {
+ return get_absent_extent<T>(t, offset, length, 0, length,
+ std::forward<Func>(extent_init_func));
+ }
+
bool is_viewable_extent_stable(
Transaction &t,
CachedExtentRef extent)
@@ -469,13 +448,19 @@ public:
return view->is_data_stable();
}
- using get_extent_ertr = base_ertr;
- get_extent_ertr::future<CachedExtentRef>
+ get_extent_iertr::future<CachedExtentRef>
get_extent_viewable_by_trans(
Transaction &t,
CachedExtentRef extent)
{
assert(extent->is_valid());
+
+ const auto t_src = t.get_src();
+ auto ext_type = extent->get_type();
+ extent_access_stats_t& access_stats = get_by_ext(
+ get_by_src(stats.access_by_src_ext, t_src),
+ ext_type);
+
CachedExtent* p_extent;
if (extent->is_stable()) {
p_extent = extent->get_transactional_view(t);
@@ -483,10 +468,12 @@ public:
assert(!extent->is_stable_writting());
assert(p_extent->is_pending_in_trans(t.get_trans_id()));
assert(!p_extent->is_stable_writting());
+ ++access_stats.trans_pending;
+ ++stats.access.s.trans_pending;
if (p_extent->is_mutable()) {
assert(p_extent->is_fully_loaded());
assert(!p_extent->is_pending_io());
- return get_extent_ertr::make_ready_future<CachedExtentRef>(
+ return get_extent_iertr::make_ready_future<CachedExtentRef>(
CachedExtentRef(p_extent));
} else {
assert(p_extent->is_exist_clean());
@@ -495,57 +482,101 @@ public:
// stable from trans-view
assert(!p_extent->is_pending_in_trans(t.get_trans_id()));
if (t.maybe_add_to_read_set(p_extent)) {
- touch_extent(*p_extent);
+ if (p_extent->is_dirty()) {
+ ++access_stats.cache_dirty;
+ ++stats.access.s.cache_dirty;
+ } else {
+ ++access_stats.cache_lru;
+ ++stats.access.s.cache_lru;
+ }
+ touch_extent(*p_extent, &t_src, t.get_cache_hint());
+ } else {
+ if (p_extent->is_dirty()) {
+ ++access_stats.trans_dirty;
+ ++stats.access.s.trans_dirty;
+ } else {
+ ++access_stats.trans_lru;
+ ++stats.access.s.trans_lru;
+ }
}
}
} else {
assert(!extent->is_stable_writting());
assert(extent->is_pending_in_trans(t.get_trans_id()));
+ ++access_stats.trans_pending;
+ ++stats.access.s.trans_pending;
if (extent->is_mutable()) {
assert(extent->is_fully_loaded());
assert(!extent->is_pending_io());
- return get_extent_ertr::make_ready_future<CachedExtentRef>(extent);
+ return get_extent_iertr::make_ready_future<CachedExtentRef>(extent);
} else {
assert(extent->is_exist_clean());
p_extent = extent.get();
}
}
- assert(p_extent->is_stable() || p_extent->is_exist_clean());
// user should not see RETIRED_PLACEHOLDER extents
- ceph_assert(p_extent->get_type() != extent_types_t::RETIRED_PLACEHOLDER);
- if (!p_extent->is_fully_loaded()) {
- assert(!p_extent->is_mutable());
- LOG_PREFIX(Cache::get_extent_viewable_by_trans);
- SUBDEBUG(seastore_cache,
- "{} {}~{} is present without been fully loaded, reading ... -- {}",
- p_extent->get_type(), p_extent->get_paddr(),p_extent->get_length(),
- *p_extent);
- auto bp = alloc_cache_buf(p_extent->get_length());
- p_extent->set_bptr(std::move(bp));
- return read_extent<CachedExtent>(CachedExtentRef(p_extent));
- }
- return p_extent->wait_io(
- ).then([p_extent] {
- return get_extent_ertr::make_ready_future<CachedExtentRef>(
+ ceph_assert(!is_retired_placeholder_type(p_extent->get_type()));
+ // for logical extents, handle partial load in TM::read_pin(),
+ // also see read_extent_maybe_partial() and get_absent_extent()
+ assert(is_logical_type(p_extent->get_type()) ||
+ p_extent->is_fully_loaded());
+
+ return trans_intr::make_interruptible(
+ p_extent->wait_io()
+ ).then_interruptible([p_extent] {
+ return get_extent_iertr::make_ready_future<CachedExtentRef>(
CachedExtentRef(p_extent));
});
}
template <typename T>
- using read_extent_ret = get_extent_ertr::future<TCachedExtentRef<T>>;
-
- template <typename T>
- read_extent_ret<T> get_extent_viewable_by_trans(
+ get_extent_iertr::future<TCachedExtentRef<T>>
+ get_extent_viewable_by_trans(
Transaction &t,
TCachedExtentRef<T> extent)
{
return get_extent_viewable_by_trans(t, CachedExtentRef(extent.get())
- ).safe_then([](auto p_extent) {
+ ).si_then([](auto p_extent) {
return p_extent->template cast<T>();
});
}
+ // wait extent io or do partial reads
+ template <typename T>
+ get_extent_iertr::future<TCachedExtentRef<T>>
+ read_extent_maybe_partial(
+ Transaction &t,
+ TCachedExtentRef<T> extent,
+ extent_len_t partial_off,
+ extent_len_t partial_len) {
+ assert(is_logical_type(extent->get_type()));
+ if (!extent->is_range_loaded(partial_off, partial_len)) {
+ LOG_PREFIX(Cache::read_extent_maybe_partial);
+ SUBDEBUGT(seastore_cache,
+ "{} {}~0x{:x} is present on t without range 0x{:x}~0x{:x}, reading ... -- {}",
+ t, extent->get_type(), extent->get_paddr(), extent->get_length(),
+ partial_off, partial_len, *extent);
+ const auto t_src = t.get_src();
+ extent_access_stats_t& access_stats = get_by_ext(
+ get_by_src(stats.access_by_src_ext, t_src),
+ extent->get_type());
+ ++access_stats.load_present;
+ ++stats.access.s.load_present;
+ return trans_intr::make_interruptible(
+ do_read_extent_maybe_partial(
+ std::move(extent), partial_off, partial_len, &t_src));
+ } else {
+ // TODO(implement fine-grained-wait):
+ // the range might be already loaded, but we don't know
+ return trans_intr::make_interruptible(
+ extent->wait_io()
+ ).then_interruptible([extent] {
+ return get_extent_iertr::make_ready_future<TCachedExtentRef<T>>(extent);
+ });
+ }
+ }
+
extent_len_t get_block_size() const {
return epm.get_block_size();
}
@@ -553,60 +584,127 @@ public:
// Interfaces only for tests.
public:
CachedExtentRef test_query_cache(paddr_t offset) {
- return query_cache(offset, nullptr);
+ return query_cache(offset);
}
private:
+ using get_extent_ertr = base_ertr;
+ template <typename T>
+ using read_extent_ret = get_extent_ertr::future<TCachedExtentRef<T>>;
+ /// Implements exclusive call to read_extent() for the extent
+ template <typename T>
+ read_extent_ret<T> do_read_extent_maybe_partial(
+ TCachedExtentRef<T>&& extent,
+ extent_len_t partial_off,
+ extent_len_t partial_len,
+ const Transaction::src_t* p_src)
+ {
+ LOG_PREFIX(Cache::do_read_extent_maybe_partial);
+ // They must be atomic:
+ // 1. checking missing range and wait io
+ // 2. checking missing range and read
+ // because the extents in Caches can be accessed concurrently
+ //
+ // TODO(implement fine-grained-wait)
+ assert(!extent->is_range_loaded(partial_off, partial_len));
+ assert(!extent->is_mutable());
+ if (extent->is_pending_io()) {
+ std::optional<Transaction::src_t> src;
+ if (p_src) {
+ src = *p_src;
+ }
+ auto* p_extent = extent.get();
+ return p_extent->wait_io(
+ ).then([extent=std::move(extent), partial_off, partial_len, this, FNAME, src]() mutable
+ -> read_extent_ret<T> {
+ if (extent->is_range_loaded(partial_off, partial_len)) {
+ SUBDEBUG(seastore_cache,
+ "{} {}~0x{:x} got range 0x{:x}~0x{:x} ... -- {}",
+ extent->get_type(), extent->get_paddr(), extent->get_length(),
+ partial_off, partial_len, *extent);
+ // we don't know whether the target range is loading or not
+ if (extent->is_pending_io()) {
+ auto* p_extent = extent.get();
+ return p_extent->wait_io(
+ ).then([extent=std::move(extent)]() mutable {
+ return seastar::make_ready_future<TCachedExtentRef<T>>(std::move(extent));
+ });
+ } else {
+ return seastar::make_ready_future<TCachedExtentRef<T>>(std::move(extent));
+ }
+ } else { // range not loaded
+ SUBDEBUG(seastore_cache,
+ "{} {}~0x{:x} without range 0x{:x}~0x{:x} ... -- {}",
+ extent->get_type(), extent->get_paddr(), extent->get_length(),
+ partial_off, partial_len, *extent);
+ Transaction::src_t* p_src = (src.has_value() ? &src.value() : nullptr);
+ return do_read_extent_maybe_partial(
+ std::move(extent), partial_off, partial_len, p_src);
+ }
+ });
+ } else {
+ SUBDEBUG(seastore_cache,
+ "{} {}~0x{:x} is not pending without range 0x{:x}~0x{:x}, reading ... -- {}",
+ extent->get_type(), extent->get_paddr(), extent->get_length(),
+ partial_off, partial_len, *extent);
+ return read_extent<T>(
+ std::move(extent), partial_off, partial_len, p_src);
+ }
+ }
+
/**
* do_get_caching_extent
*
* returns ref to extent at offset~length of type T either from
* - extent_set if already in cache
* - disk
+ * only load partial_off~partial_len
*/
using src_ext_t = std::pair<Transaction::src_t, extent_types_t>;
template <typename T, typename Func, typename OnCache>
read_extent_ret<T> do_get_caching_extent(
paddr_t offset, ///< [in] starting addr
extent_len_t length, ///< [in] length
- const src_ext_t* p_src_ext, ///< [in] cache query metric key
+ extent_len_t partial_off, ///< [in] offset of piece in extent
+ extent_len_t partial_len, ///< [in] length of piece in extent
Func &&extent_init_func, ///< [in] init func for extent
- OnCache &&on_cache
+ OnCache &&on_cache,
+ const Transaction::src_t* p_src
) {
LOG_PREFIX(Cache::do_get_caching_extent);
- auto cached = query_cache(offset, p_src_ext);
+ auto cached = query_cache(offset);
if (!cached) {
- auto ret = CachedExtent::make_cached_extent_ref<T>(
- alloc_cache_buf(length));
+ // partial read
+ TCachedExtentRef<T> ret = CachedExtent::make_cached_extent_ref<T>(length);
ret->init(CachedExtent::extent_state_t::CLEAN_PENDING,
offset,
PLACEMENT_HINT_NULL,
NULL_GENERATION,
TRANS_ID_NULL);
SUBDEBUG(seastore_cache,
- "{} {}~{} is absent, add extent and reading ... -- {}",
- T::TYPE, offset, length, *ret);
- const auto p_src = p_src_ext ? &p_src_ext->first : nullptr;
- add_extent(ret, p_src);
+ "{} {}~0x{:x} is absent, add extent and reading range 0x{:x}~0x{:x} ... -- {}",
+ T::TYPE, offset, length, partial_off, partial_len, *ret);
+ add_extent(ret);
+ // touch_extent() should be included in on_cache
on_cache(*ret);
extent_init_func(*ret);
return read_extent<T>(
- std::move(ret));
+ std::move(ret), partial_off, partial_len, p_src);
}
// extent PRESENT in cache
- if (cached->get_type() == extent_types_t::RETIRED_PLACEHOLDER) {
- auto ret = CachedExtent::make_cached_extent_ref<T>(
- alloc_cache_buf(length));
+ if (is_retired_placeholder_type(cached->get_type())) {
+ // partial read
+ TCachedExtentRef<T> ret = CachedExtent::make_cached_extent_ref<T>(length);
ret->init(CachedExtent::extent_state_t::CLEAN_PENDING,
offset,
PLACEMENT_HINT_NULL,
NULL_GENERATION,
TRANS_ID_NULL);
SUBDEBUG(seastore_cache,
- "{} {}~{} is absent(placeholder), reading ... -- {}",
- T::TYPE, offset, length, *ret);
- extents.replace(*ret, *cached);
+ "{} {}~0x{:x} is absent(placeholder), add extent and reading range 0x{:x}~0x{:x} ... -- {}",
+ T::TYPE, offset, length, partial_off, partial_len, *ret);
+ extents_index.replace(*ret, *cached);
on_cache(*ret);
// replace placeholder in transactions
@@ -618,34 +716,41 @@ private:
cached->state = CachedExtent::extent_state_t::INVALID;
extent_init_func(*ret);
return read_extent<T>(
- std::move(ret));
- } else if (!cached->is_fully_loaded()) {
- auto ret = TCachedExtentRef<T>(static_cast<T*>(cached.get()));
- on_cache(*ret);
- SUBDEBUG(seastore_cache,
- "{} {}~{} is present without been fully loaded, reading ... -- {}",
- T::TYPE, offset, length, *ret);
- auto bp = alloc_cache_buf(length);
- ret->set_bptr(std::move(bp));
- return read_extent<T>(
- std::move(ret));
- } else {
+ std::move(ret), partial_off, partial_len, p_src);
+ }
+
+ auto ret = TCachedExtentRef<T>(static_cast<T*>(cached.get()));
+ on_cache(*ret);
+ if (ret->is_range_loaded(partial_off, partial_len)) {
SUBTRACE(seastore_cache,
- "{} {}~{} is present in cache -- {}",
- T::TYPE, offset, length, *cached);
- auto ret = TCachedExtentRef<T>(static_cast<T*>(cached.get()));
- on_cache(*ret);
- return ret->wait_io(
- ).then([ret=std::move(ret)]() mutable
- -> read_extent_ret<T> {
+ "{} {}~0x{:x} is present with range 0x{:x}~0x{:x} ... -- {}",
+ T::TYPE, offset, length, partial_off, partial_len, *ret);
+ return ret->wait_io().then([ret] {
// ret may be invalid, caller must check
- return read_extent_ret<T>(
- get_extent_ertr::ready_future_marker{},
- std::move(ret));
+ return seastar::make_ready_future<TCachedExtentRef<T>>(ret);
});
+ } else {
+ SUBDEBUG(seastore_cache,
+ "{} {}~0x{:x} is present without range 0x{:x}~0x{:x}, reading ... -- {}",
+ T::TYPE, offset, length, partial_off, partial_len, *ret);
+ return do_read_extent_maybe_partial(
+ std::move(ret), partial_off, partial_len, p_src);
}
}
+ template <typename T, typename Func, typename OnCache>
+ read_extent_ret<T> do_get_caching_extent(
+ paddr_t offset, ///< [in] starting addr
+ extent_len_t length, ///< [in] length
+ Func &&extent_init_func, ///< [in] init func for extent
+ OnCache &&on_cache,
+ const Transaction::src_t* p_src
+ ) {
+ return do_get_caching_extent<T>(offset, length, 0, length,
+ std::forward<Func>(extent_init_func),
+ std::forward<OnCache>(on_cache),
+ p_src);
+ }
// This is a workaround std::move_only_function not being available,
// not really worth generalizing at this time.
@@ -680,11 +785,18 @@ private:
paddr_t offset,
laddr_t laddr,
extent_len_t length,
- const Transaction::src_t* p_src,
extent_init_func_t &&extent_init_func,
- extent_init_func_t &&on_cache
- );
+ extent_init_func_t &&on_cache,
+ const Transaction::src_t* p_src);
+ /**
+ * get_caching_extent_by_type
+ *
+ * Note, the current implementation leverages parent-child
+ * pointers in LBA instead, so it should only be called in tests.
+ *
+ * This path won't be accounted by the cache_access_stats_t.
+ */
using get_extent_by_type_iertr = get_extent_iertr;
using get_extent_by_type_ret = get_extent_by_type_iertr::future<
CachedExtentRef>;
@@ -697,41 +809,39 @@ private:
extent_init_func_t &&extent_init_func
) {
LOG_PREFIX(Cache::get_caching_extent_by_type);
+ const auto t_src = t.get_src();
CachedExtentRef ret;
auto status = t.get_extent(offset, &ret);
if (status == Transaction::get_extent_ret::RETIRED) {
- SUBERRORT(seastore_cache, "{} {}~{} {} is retired on t -- {}",
+ SUBERRORT(seastore_cache, "{} {}~0x{:x} {} is retired on t -- {}",
t, type, offset, length, laddr, *ret);
ceph_abort("impossible");
} else if (status == Transaction::get_extent_ret::PRESENT) {
+ assert(ret->get_length() == length);
if (ret->is_fully_loaded()) {
- SUBTRACET(seastore_cache, "{} {}~{} {} is present on t -- {}",
+ SUBTRACET(seastore_cache, "{} {}~0x{:x} {} is present on t -- {}",
t, type, offset, length, laddr, *ret);
return ret->wait_io().then([ret] {
return seastar::make_ready_future<CachedExtentRef>(ret);
});
} else {
- assert(!ret->is_mutable());
- touch_extent(*ret);
- SUBDEBUGT(seastore_cache, "{} {}~{} {} is present on t without been \
- fully loaded, reading ...", t, type, offset, length, laddr);
- auto bp = alloc_cache_buf(ret->get_length());
- ret->set_bptr(std::move(bp));
- return read_extent<CachedExtent>(
- std::move(ret));
+ SUBDEBUGT(seastore_cache,
+ "{} {}~0x{:x} {} is present on t without fully loaded, reading ... -- {}",
+ t, type, offset, length, laddr, *ret);
+ return do_read_extent_maybe_partial<CachedExtent>(
+ std::move(ret), 0, length, &t_src);
}
} else {
- SUBTRACET(seastore_cache, "{} {}~{} {} is absent on t, query cache ...",
+ SUBTRACET(seastore_cache, "{} {}~0x{:x} {} is absent on t, query cache ...",
t, type, offset, length, laddr);
- auto f = [&t, this](CachedExtent &ext) {
+ auto f = [&t, this, t_src](CachedExtent &ext) {
t.add_to_read_set(CachedExtentRef(&ext));
- touch_extent(ext);
+ touch_extent(ext, &t_src, t.get_cache_hint());
};
- auto src = t.get_src();
return trans_intr::make_interruptible(
do_get_caching_extent_by_type(
- type, offset, laddr, length, &src,
- std::move(extent_init_func), std::move(f))
+ type, offset, laddr, length,
+ std::move(extent_init_func), std::move(f), &t_src)
);
}
}
@@ -755,17 +865,25 @@ private:
}
#endif
- SUBTRACET(seastore_cache, "{} {}~{} {} is absent on t, query cache ...",
+ SUBTRACET(seastore_cache, "{} {}~0x{:x} {} is absent on t, query cache ...",
t, type, offset, length, laddr);
- auto f = [&t, this](CachedExtent &ext) {
+ const auto t_src = t.get_src();
+ auto f = [&t, this, t_src](CachedExtent &ext) {
+ // FIXME: assert(ext.is_stable_clean());
+ assert(ext.is_stable());
+ extent_access_stats_t& access_stats = get_by_ext(
+ get_by_src(stats.access_by_src_ext, t_src),
+ ext.get_type());
+ ++access_stats.load_absent;
+ ++stats.access.s.load_absent;
+
t.add_to_read_set(CachedExtentRef(&ext));
- touch_extent(ext);
+ touch_extent(ext, &t_src, t.get_cache_hint());
};
- auto src = t.get_src();
return trans_intr::make_interruptible(
do_get_caching_extent_by_type(
- type, offset, laddr, length, &src,
- std::move(extent_init_func), std::move(f))
+ type, offset, laddr, length,
+ std::move(extent_init_func), std::move(f), &t_src)
);
}
@@ -787,7 +905,7 @@ private:
for (auto it = start_iter;
it != end_iter;
it++) {
- res.emplace(it->paddr, it->laddr, it->len, it->type, it->seq);
+ res.emplace(it->paddr, it->laddr, it->len, it->type);
}
return res;
}
@@ -886,7 +1004,7 @@ public:
#endif
) {
LOG_PREFIX(Cache::alloc_new_non_data_extent);
- SUBTRACET(seastore_cache, "allocate {} {}B, hint={}, gen={}",
+ SUBTRACET(seastore_cache, "allocate {} 0x{:x}B, hint={}, gen={}",
t, T::TYPE, length, hint, rewrite_gen_printer_t{gen});
#ifdef UNIT_TESTS_BUILT
auto result = epm.alloc_new_non_data_extent(t, T::TYPE, length, hint, gen, epaddr);
@@ -894,7 +1012,8 @@ public:
auto result = epm.alloc_new_non_data_extent(t, T::TYPE, length, hint, gen);
#endif
if (!result) {
- return nullptr;
+ SUBERRORT(seastore_cache, "insufficient space", t);
+ std::rethrow_exception(crimson::ct_error::enospc::exception_ptr());
}
auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result->bp));
ret->init(CachedExtent::extent_state_t::INITIAL_WRITE_PENDING,
@@ -904,7 +1023,7 @@ public:
t.get_trans_id());
t.add_fresh_extent(ret);
SUBDEBUGT(seastore_cache,
- "allocated {} {}B extent at {}, hint={}, gen={} -- {}",
+ "allocated {} 0x{:x}B extent at {}, hint={}, gen={} -- {}",
t, T::TYPE, length, result->paddr,
hint, rewrite_gen_printer_t{result->gen}, *ret);
return ret;
@@ -928,13 +1047,17 @@ public:
#endif
) {
LOG_PREFIX(Cache::alloc_new_data_extents);
- SUBTRACET(seastore_cache, "allocate {} {}B, hint={}, gen={}",
+ SUBTRACET(seastore_cache, "allocate {} 0x{:x}B, hint={}, gen={}",
t, T::TYPE, length, hint, rewrite_gen_printer_t{gen});
#ifdef UNIT_TESTS_BUILT
auto results = epm.alloc_new_data_extents(t, T::TYPE, length, hint, gen, epaddr);
#else
auto results = epm.alloc_new_data_extents(t, T::TYPE, length, hint, gen);
#endif
+ if (results.empty()) {
+ SUBERRORT(seastore_cache, "insufficient space", t);
+ std::rethrow_exception(crimson::ct_error::enospc::exception_ptr());
+ }
std::vector<TCachedExtentRef<T>> extents;
for (auto &result : results) {
auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result.bp));
@@ -945,7 +1068,7 @@ public:
t.get_trans_id());
t.add_fresh_extent(ret);
SUBDEBUGT(seastore_cache,
- "allocated {} {}B extent at {}, hint={}, gen={} -- {}",
+ "allocated {} 0x{:x}B extent at {}, hint={}, gen={} -- {}",
t, T::TYPE, length, result.paddr,
hint, rewrite_gen_printer_t{result.gen}, *ret);
extents.emplace_back(std::move(ret));
@@ -972,15 +1095,14 @@ public:
TCachedExtentRef<T> ext;
if (original_bptr.has_value()) {
// shallow copy the buffer from original extent
- auto nbp = ceph::bufferptr(
- *original_bptr,
- remap_laddr - original_laddr,
- remap_length);
+ auto remap_offset = remap_laddr.get_byte_distance<
+ extent_len_t>(original_laddr);
+ auto nbp = ceph::bufferptr(*original_bptr, remap_offset, remap_length);
// ExtentPlacementManager::alloc_new_extent will make a new
// (relative/temp) paddr, so make extent directly
ext = CachedExtent::make_cached_extent_ref<T>(std::move(nbp));
} else {
- ext = CachedExtent::make_placeholder_cached_extent_ref<T>(remap_length);
+ ext = CachedExtent::make_cached_extent_ref<T>(remap_length);
}
ext->init(CachedExtent::extent_state_t::EXIST_CLEAN,
@@ -992,7 +1114,7 @@ public:
auto extent = ext->template cast<T>();
extent->set_laddr(remap_laddr);
t.add_fresh_extent(ext);
- SUBTRACET(seastore_cache, "allocated {} {}B, hint={}, has ptr? {} -- {}",
+ SUBTRACET(seastore_cache, "allocated {} 0x{:x}B, hint={}, has ptr? {} -- {}",
t, T::TYPE, remap_length, remap_laddr, original_bptr.has_value(), *extent);
return extent;
}
@@ -1135,10 +1257,10 @@ public:
{
LOG_PREFIX(Cache::init_cached_extents);
SUBINFOT(seastore_cache,
- "start with {}({}B) extents, {} dirty, dirty_from={}, alloc_from={}",
+ "start with {}(0x{:x}B) extents, {} dirty, dirty_from={}, alloc_from={}",
t,
- extents.size(),
- extents.get_bytes(),
+ extents_index.size(),
+ extents_index.get_bytes(),
dirty.size(),
get_oldest_dirty_from().value_or(JOURNAL_SEQ_NULL),
get_oldest_backref_dirty_from().value_or(JOURNAL_SEQ_NULL));
@@ -1147,7 +1269,7 @@ public:
// Cache::root should have been inserted to the dirty list
assert(root->is_dirty());
std::vector<CachedExtentRef> _dirty;
- for (auto &e : extents) {
+ for (auto &e : extents_index) {
_dirty.push_back(CachedExtentRef(&e));
}
return seastar::do_with(
@@ -1164,7 +1286,7 @@ public:
).si_then([this, FNAME, &t, e](bool is_alive) {
if (!is_alive) {
SUBDEBUGT(seastore_cache, "extent is not alive, remove extent -- {}", t, *e);
- remove_extent(e);
+ remove_extent(e, nullptr);
e->set_invalid(t);
} else {
SUBDEBUGT(seastore_cache, "extent is alive -- {}", t, *e);
@@ -1178,10 +1300,10 @@ public:
}
).si_then([this, FNAME, &t] {
SUBINFOT(seastore_cache,
- "finish with {}({}B) extents, {} dirty, dirty_from={}, alloc_from={}",
+ "finish with {}(0x{:x}B) extents, {} dirty, dirty_from={}, alloc_from={}",
t,
- extents.size(),
- extents.get_bytes(),
+ extents_index.size(),
+ extents_index.get_bytes(),
dirty.size(),
get_oldest_dirty_from().value_or(JOURNAL_SEQ_NULL),
get_oldest_backref_dirty_from().value_or(JOURNAL_SEQ_NULL));
@@ -1352,21 +1474,20 @@ private:
/// Update lru for access to ref
void touch_extent(
CachedExtent &ext,
- const Transaction::src_t* p_src=nullptr)
+ const Transaction::src_t* p_src,
+ cache_hint_t hint)
{
- if (p_src &&
- is_background_transaction(*p_src) &&
- is_logical_type(ext.get_type())) {
+ if (hint == CACHE_HINT_NOCACHE && is_logical_type(ext.get_type())) {
return;
}
if (ext.is_stable_clean() && !ext.is_placeholder()) {
- lru.move_to_top(ext);
+ lru.move_to_top(ext, p_src);
}
}
ExtentPlacementManager& epm;
RootBlockRef root; ///< ref to current root
- ExtentIndex extents; ///< set of live extents
+ ExtentIndex extents_index; ///< set of live extents
journal_seq_t last_commit = JOURNAL_SEQ_MIN;
@@ -1380,7 +1501,7 @@ private:
*
* holds refs to dirty extents. Ordered by CachedExtent::get_dirty_from().
*/
- CachedExtent::list dirty;
+ CachedExtent::primary_ref_list dirty;
using backref_extent_entry_query_set_t =
std::set<
@@ -1416,6 +1537,7 @@ private:
friend class crimson::os::seastore::backref::BtreeBackrefManager;
friend class crimson::os::seastore::BackrefManager;
+
/**
* lru
*
@@ -1426,71 +1548,139 @@ private:
const size_t capacity = 0;
// current size (bytes)
- size_t contents = 0;
+ size_t current_size = 0;
+
+ counter_by_extent_t<cache_size_stats_t> sizes_by_ext;
+ cache_io_stats_t overall_io;
+ counter_by_src_t<counter_by_extent_t<cache_io_stats_t> >
+ trans_io_by_src_ext;
+
+ mutable cache_io_stats_t last_overall_io;
+ mutable cache_io_stats_t last_trans_io;
+ mutable counter_by_src_t<counter_by_extent_t<cache_io_stats_t> >
+ last_trans_io_by_src_ext;
- CachedExtent::list lru;
+ CachedExtent::primary_ref_list lru;
- void trim_to_capacity() {
- while (contents > capacity) {
- assert(lru.size() > 0);
- remove_from_lru(lru.front());
+ void do_remove_from_lru(
+ CachedExtent &extent,
+ const Transaction::src_t* p_src) {
+ assert(extent.is_stable_clean() && !extent.is_placeholder());
+ assert(extent.primary_ref_list_hook.is_linked());
+ assert(lru.size() > 0);
+ auto extent_loaded_length = extent.get_loaded_length();
+ assert(current_size >= extent_loaded_length);
+
+ lru.erase(lru.s_iterator_to(extent));
+ current_size -= extent_loaded_length;
+ get_by_ext(sizes_by_ext, extent.get_type()).account_out(extent_loaded_length);
+ overall_io.out_sizes.account_in(extent_loaded_length);
+ if (p_src) {
+ get_by_ext(
+ get_by_src(trans_io_by_src_ext, *p_src),
+ extent.get_type()
+ ).out_sizes.account_in(extent_loaded_length);
}
+ intrusive_ptr_release(&extent);
}
- void add_to_lru(CachedExtent &extent) {
- assert(extent.is_stable_clean() && !extent.is_placeholder());
-
- if (!extent.primary_ref_list_hook.is_linked()) {
- contents += extent.get_length();
- intrusive_ptr_add_ref(&extent);
- lru.push_back(extent);
+ void trim_to_capacity(
+ const Transaction::src_t* p_src) {
+ while (current_size > capacity) {
+ do_remove_from_lru(lru.front(), p_src);
}
- trim_to_capacity();
}
public:
LRU(size_t capacity) : capacity(capacity) {}
- size_t get_capacity() const {
+ size_t get_capacity_bytes() const {
return capacity;
}
- size_t get_current_contents_bytes() const {
- return contents;
+ size_t get_current_size_bytes() const {
+ return current_size;
}
- size_t get_current_contents_extents() const {
+ size_t get_current_num_extents() const {
return lru.size();
}
+ void get_stats(
+ cache_stats_t &stats,
+ bool report_detail,
+ double seconds) const;
+
void remove_from_lru(CachedExtent &extent) {
assert(extent.is_stable_clean() && !extent.is_placeholder());
if (extent.primary_ref_list_hook.is_linked()) {
- lru.erase(lru.s_iterator_to(extent));
- assert(contents >= extent.get_length());
- contents -= extent.get_length();
- intrusive_ptr_release(&extent);
+ do_remove_from_lru(extent, nullptr);
}
}
- void move_to_top(CachedExtent &extent) {
+ void move_to_top(
+ CachedExtent &extent,
+ const Transaction::src_t* p_src) {
assert(extent.is_stable_clean() && !extent.is_placeholder());
+ auto extent_loaded_length = extent.get_loaded_length();
+ if (extent.primary_ref_list_hook.is_linked()) {
+ // present, move to top (back)
+ assert(lru.size() > 0);
+ assert(current_size >= extent_loaded_length);
+ lru.erase(lru.s_iterator_to(extent));
+ lru.push_back(extent);
+ } else {
+ // absent, add to top (back)
+ if (extent_loaded_length > 0) {
+ current_size += extent_loaded_length;
+ get_by_ext(sizes_by_ext, extent.get_type()).account_in(extent_loaded_length);
+ overall_io.in_sizes.account_in(extent_loaded_length);
+ if (p_src) {
+ get_by_ext(
+ get_by_src(trans_io_by_src_ext, *p_src),
+ extent.get_type()
+ ).in_sizes.account_in(extent_loaded_length);
+ }
+ } // else: the extent isn't loaded upon touch_extent()/on_cache(),
+ // account the io later in increase_cached_size() upon read_extent()
+ intrusive_ptr_add_ref(&extent);
+ lru.push_back(extent);
+
+ trim_to_capacity(p_src);
+ }
+ }
+
+ void increase_cached_size(
+ CachedExtent &extent,
+ extent_len_t increased_length,
+ const Transaction::src_t* p_src) {
+ assert(!extent.is_mutable());
+
if (extent.primary_ref_list_hook.is_linked()) {
- lru.erase(lru.s_iterator_to(extent));
- intrusive_ptr_release(&extent);
- assert(contents >= extent.get_length());
- contents -= extent.get_length();
+ assert(extent.is_stable_clean() && !extent.is_placeholder());
+ // present, increase size
+ assert(lru.size() > 0);
+ current_size += increased_length;
+ get_by_ext(sizes_by_ext, extent.get_type()).account_in(increased_length);
+ overall_io.in_sizes.account_in(increased_length);
+ if (p_src) {
+ get_by_ext(
+ get_by_src(trans_io_by_src_ext, *p_src),
+ extent.get_type()
+ ).in_sizes.account_in(increased_length);
+ }
+
+ trim_to_capacity(nullptr);
}
- add_to_lru(extent);
}
void clear() {
LOG_PREFIX(Cache::LRU::clear);
for (auto iter = lru.begin(); iter != lru.end();) {
SUBDEBUG(seastore_cache, "clearing {}", *iter);
- remove_from_lru(*(iter++));
+ do_remove_from_lru(*(iter++), nullptr);
}
}
@@ -1504,9 +1694,6 @@ private:
uint64_t hit = 0;
};
- template <typename CounterT>
- using counter_by_extent_t = std::array<CounterT, EXTENT_TYPES_MAX>;
-
struct invalid_trans_efforts_t {
io_stat_t read;
io_stat_t mutate;
@@ -1559,9 +1746,18 @@ private:
counter_by_src_t<uint64_t> trans_created_by_src;
counter_by_src_t<commit_trans_efforts_t> committed_efforts_by_src;
counter_by_src_t<invalid_trans_efforts_t> invalidated_efforts_by_src;
- counter_by_src_t<query_counters_t> cache_query_by_src;
success_read_trans_efforts_t success_read_efforts;
+
uint64_t dirty_bytes = 0;
+ counter_by_extent_t<cache_size_stats_t> dirty_sizes_by_ext;
+ dirty_io_stats_t dirty_io;
+ counter_by_src_t<counter_by_extent_t<dirty_io_stats_t> >
+ dirty_io_by_src_ext;
+
+ cache_access_stats_t access;
+ counter_by_src_t<uint64_t> cache_absent_by_src;
+ counter_by_src_t<counter_by_extent_t<extent_access_stats_t> >
+ access_by_src_ext;
uint64_t onode_tree_depth = 0;
int64_t onode_tree_extents_num = 0;
@@ -1586,18 +1782,19 @@ private:
std::array<uint64_t, NUM_SRC_COMB> trans_conflicts_by_srcs;
counter_by_src_t<uint64_t> trans_conflicts_by_unknown;
- version_stat_t committed_dirty_version;
- version_stat_t committed_reclaim_version;
+ rewrite_stats_t trim_rewrites;
+ rewrite_stats_t reclaim_rewrites;
} stats;
- template <typename CounterT>
- CounterT& get_by_ext(
- counter_by_extent_t<CounterT>& counters_by_ext,
- extent_types_t ext) {
- auto index = static_cast<uint8_t>(ext);
- assert(index < EXTENT_TYPES_MAX);
- return counters_by_ext[index];
- }
+ mutable dirty_io_stats_t last_dirty_io;
+ mutable counter_by_src_t<counter_by_extent_t<dirty_io_stats_t> >
+ last_dirty_io_by_src_ext;
+ mutable rewrite_stats_t last_trim_rewrites;
+ mutable rewrite_stats_t last_reclaim_rewrites;
+ mutable cache_access_stats_t last_access;
+ mutable counter_by_src_t<uint64_t> last_cache_absent_by_src;
+ mutable counter_by_src_t<counter_by_extent_t<extent_access_stats_t> >
+ last_access_by_src_ext;
void account_conflict(Transaction::src_t src1, Transaction::src_t src2) {
assert(src1 < Transaction::src_t::MAX);
@@ -1630,33 +1827,55 @@ private:
seastar::metrics::metric_group metrics;
void register_metrics();
- /// alloc buffer for cached extent
- bufferptr alloc_cache_buf(size_t size) {
- // TODO: memory pooling etc
- auto bp = ceph::bufferptr(
- buffer::create_page_aligned(size));
- bp.zero();
- return bp;
+ void apply_backref_mset(
+ backref_entry_refs_t& backref_entries) {
+ for (auto& entry : backref_entries) {
+ backref_entry_mset.insert(*entry);
+ }
}
- void backref_batch_update(
- std::vector<backref_entry_ref> &&,
- const journal_seq_t &);
+ void apply_backref_byseq(
+ backref_entry_refs_t&& backref_entries,
+ const journal_seq_t& seq);
+
+ void commit_backref_entries(
+ backref_entry_refs_t&& backref_entries,
+ const journal_seq_t& seq) {
+ apply_backref_mset(backref_entries);
+ apply_backref_byseq(std::move(backref_entries), seq);
+ }
/// Add extent to extents handling dirty and refcounting
- void add_extent(CachedExtentRef ref, const Transaction::src_t* t_src);
+ ///
+ /// Note, it must follows with add_to_dirty() or touch_extent().
+ /// The only exception is RetiredExtentPlaceholder.
+ void add_extent(CachedExtentRef ref);
/// Mark exising extent ref dirty -- mainly for replay
void mark_dirty(CachedExtentRef ref);
/// Add dirty extent to dirty list
- void add_to_dirty(CachedExtentRef ref);
+ void add_to_dirty(
+ CachedExtentRef ref,
+ const Transaction::src_t* p_src);
+
+ /// Replace the prev dirty extent by next
+ void replace_dirty(
+ CachedExtentRef next,
+ CachedExtentRef prev,
+ const Transaction::src_t& src);
/// Remove from dirty list
- void remove_from_dirty(CachedExtentRef ref);
+ void remove_from_dirty(
+ CachedExtentRef ref,
+ const Transaction::src_t* p_src);
+
+ void clear_dirty();
/// Remove extent from extents handling dirty and refcounting
- void remove_extent(CachedExtentRef ref);
+ void remove_extent(
+ CachedExtentRef ref,
+ const Transaction::src_t* p_src);
/// Retire extent
void commit_retire_extent(Transaction& t, CachedExtentRef ref);
@@ -1674,39 +1893,74 @@ private:
/// Introspect transaction when it is being destructed
void on_transaction_destruct(Transaction& t);
+ /// Read the extent in range offset~length,
+ /// must be called exclusively for an extent,
+ /// also see do_read_extent_maybe_partial().
+ ///
+ /// May return an invalid extent due to transaction conflict.
template <typename T>
read_extent_ret<T> read_extent(
- TCachedExtentRef<T>&& extent
+ TCachedExtentRef<T>&& extent,
+ extent_len_t offset,
+ extent_len_t length,
+ const Transaction::src_t* p_src
) {
+ LOG_PREFIX(Cache::read_extent);
assert(extent->state == CachedExtent::extent_state_t::CLEAN_PENDING ||
- extent->state == CachedExtent::extent_state_t::EXIST_CLEAN ||
- extent->state == CachedExtent::extent_state_t::CLEAN);
+ extent->state == CachedExtent::extent_state_t::EXIST_CLEAN ||
+ extent->state == CachedExtent::extent_state_t::CLEAN);
+ assert(!extent->is_range_loaded(offset, length));
+ assert(is_aligned(offset, get_block_size()));
+ assert(is_aligned(length, get_block_size()));
extent->set_io_wait();
- return epm.read(
- extent->get_paddr(),
- extent->get_length(),
- extent->get_bptr()
- ).safe_then(
- [extent=std::move(extent), this]() mutable {
- LOG_PREFIX(Cache::read_extent);
- if (likely(extent->state == CachedExtent::extent_state_t::CLEAN_PENDING)) {
- extent->state = CachedExtent::extent_state_t::CLEAN;
- }
- ceph_assert(extent->state == CachedExtent::extent_state_t::EXIST_CLEAN
- || extent->state == CachedExtent::extent_state_t::CLEAN
- || !extent->is_valid());
- if (extent->is_valid()) {
- // crc will be checked against LBA leaf entry for logical extents,
- // or check against in-extent crc for physical extents.
- if (epm.get_checksum_needed(extent->get_paddr())) {
- extent->last_committed_crc = extent->calc_crc32c();
- } else {
- extent->last_committed_crc = CRC_NULL;
- }
- extent->on_clean_read();
- }
+ auto old_length = extent->get_loaded_length();
+ load_ranges_t to_read = extent->load_ranges(offset, length);
+ auto new_length = extent->get_loaded_length();
+ assert(new_length > old_length);
+ lru.increase_cached_size(*extent, new_length - old_length, p_src);
+ return seastar::do_with(to_read.ranges, [extent, this, FNAME](auto &read_ranges) {
+ return ExtentPlacementManager::read_ertr::parallel_for_each(
+ read_ranges, [extent, this, FNAME](auto &read_range) {
+ SUBDEBUG(seastore_cache, "reading extent {} 0x{:x}~0x{:x} ...",
+ extent->get_paddr(), read_range.offset, read_range.get_length());
+ assert(is_aligned(read_range.offset, get_block_size()));
+ assert(is_aligned(read_range.get_length(), get_block_size()));
+ return epm.read(
+ extent->get_paddr() + read_range.offset,
+ read_range.get_length(),
+ read_range.ptr);
+ });
+ }).safe_then(
+ [this, FNAME, extent=std::move(extent), offset, length]() mutable {
+ if (likely(extent->state == CachedExtent::extent_state_t::CLEAN_PENDING)) {
+ extent->state = CachedExtent::extent_state_t::CLEAN;
+ }
+ ceph_assert(extent->state == CachedExtent::extent_state_t::EXIST_CLEAN
+ || extent->state == CachedExtent::extent_state_t::CLEAN
+ || !extent->is_valid());
+ if (extent->is_valid()) {
+ if (extent->is_fully_loaded()) {
+ // crc will be checked against LBA leaf entry for logical extents,
+ // or check against in-extent crc for physical extents.
+ if (epm.get_checksum_needed(extent->get_paddr())) {
+ extent->last_committed_crc = extent->calc_crc32c();
+ } else {
+ extent->last_committed_crc = CRC_NULL;
+ }
+ // on_clean_read() may change the content, call after calc_crc32c()
+ extent->on_clean_read();
+ SUBDEBUG(seastore_cache, "read extent 0x{:x}~0x{:x} done -- {}",
+ offset, length, *extent);
+ } else {
+ extent->last_committed_crc = CRC_NULL;
+ SUBDEBUG(seastore_cache, "read extent 0x{:x}~0x{:x} done (partial) -- {}",
+ offset, length, *extent);
+ }
+ } else {
+ SUBDEBUG(seastore_cache, "read extent 0x{:x}~0x{:x} done (invalidated) -- {}",
+ offset, length, *extent);
+ }
extent->complete_io();
- SUBDEBUG(seastore_cache, "read extent done -- {}", *extent);
return get_extent_ertr::make_ready_future<TCachedExtentRef<T>>(
std::move(extent));
},
@@ -1718,21 +1972,10 @@ private:
}
// Extents in cache may contain placeholders
- CachedExtentRef query_cache(
- paddr_t offset,
- const src_ext_t* p_metric_key) {
- query_counters_t* p_counters = nullptr;
- if (p_metric_key) {
- p_counters = &get_by_src(stats.cache_query_by_src, p_metric_key->first);
- ++p_counters->access;
- }
- if (auto iter = extents.find_offset(offset);
- iter != extents.end()) {
- if (p_metric_key &&
- // retired_placeholder is not really cached yet
- iter->get_type() != extent_types_t::RETIRED_PLACEHOLDER) {
- ++p_counters->hit;
- }
+ CachedExtentRef query_cache(paddr_t offset) {
+ if (auto iter = extents_index.find_offset(offset);
+ iter != extents_index.end()) {
+ assert(iter->is_stable());
return CachedExtentRef(&*iter);
} else {
return CachedExtentRef();