summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdam Kupczyk <akupczyk@ibm.com>2025-01-10 09:26:54 +0100
committerAdam Kupczyk <akupczyk@ibm.com>2025-01-13 09:17:16 +0100
commit7f3601089d41bfc23f530c7bf3fb7efad2d055ec (patch)
tree83062d5ed21e8f781af173abf7340b2671a57229
parentos/bluestore: bluefs unittest for truncate bug (diff)
downloadceph-7f3601089d41bfc23f530c7bf3fb7efad2d055ec.tar.xz
ceph-7f3601089d41bfc23f530c7bf3fb7efad2d055ec.zip
os/bluestore: Fix BlueFS::truncate()
In `struct bluefs_fnode_t` there is a vector `extents` and the vector `extents_index` that is a log2 seek cache. Until modifications to truncate() we never removed extents from files. Modified truncate() did not update extents_index. For example 10 extents long files when truncated to 0 will have: 0 extents, 10 extents_index. After writing some data to file: 1 extents, 11 extents_index. Now, `bluefs_fnode_t::seek` will binary search extents_index, lets say it located seek at item #3. It will then jump up from #0 extent (that exists) to #3 extent which does not exist at. The worst part is that code is now broken, as #3 != extent.end(). There are 3 parts of the fix: 1) assert in `bluefs_fnode_t::seek` to protect against jumping outside extents 2) code in BlueFS::truncate to sync up `extents_index` with `extents` 3) dampening down assert in _replay to give a way out of cases where incorrect "offset 12345" (12345 is file size) instead of "offset 20000" (allocations occupied) was written to log. Fixes: https://tracker.ceph.com/issues/69481 Signed-off-by: Adam Kupczyk <akupczyk@ibm.com>
-rw-r--r--src/os/bluestore/BlueFS.cc4
-rw-r--r--src/os/bluestore/bluefs_types.cc4
-rw-r--r--src/os/bluestore/bluefs_types.h1
3 files changed, 7 insertions, 2 deletions
diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc
index e123a0a200a..2f88acdc93b 100644
--- a/src/os/bluestore/BlueFS.cc
+++ b/src/os/bluestore/BlueFS.cc
@@ -1706,7 +1706,8 @@ int BlueFS::_replay(bool noop, bool to_stdout)
<< " fnode=" << fnode
<< " delta=" << delta
<< dendl;
- ceph_assert(delta.offset == fnode.allocated);
+ // be leanient, if there is no extents just produce error message
+ ceph_assert(delta.offset == fnode.allocated || delta.extents.empty());
}
if (cct->_conf->bluefs_log_replay_check_allocations) {
int r = _check_allocations(fnode,
@@ -3830,6 +3831,7 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/
fnode.size = offset;
fnode.allocated = new_allocated;
fnode.reset_delta();
+ fnode.recalc_allocated();
log.t.op_file_update(fnode);
// sad, but is_dirty must be set to signal flushing of the log
h->file->is_dirty = true;
diff --git a/src/os/bluestore/bluefs_types.cc b/src/os/bluestore/bluefs_types.cc
index e18dd490140..fe77f7f74d8 100644
--- a/src/os/bluestore/bluefs_types.cc
+++ b/src/os/bluestore/bluefs_types.cc
@@ -154,7 +154,9 @@ mempool::bluefs::vector<bluefs_extent_t>::iterator bluefs_fnode_t::seek(
assert(it != extents_index.begin());
--it;
assert(offset >= *it);
- p += it - extents_index.begin();
+ uint32_t skip = it - extents_index.begin();
+ ceph_assert(skip <= extents.size());
+ p += skip;
offset -= *it;
}
diff --git a/src/os/bluestore/bluefs_types.h b/src/os/bluestore/bluefs_types.h
index 627118c12f8..08b3ca0cf41 100644
--- a/src/os/bluestore/bluefs_types.h
+++ b/src/os/bluestore/bluefs_types.h
@@ -89,6 +89,7 @@ struct bluefs_fnode_t {
void recalc_allocated() {
allocated = 0;
extents_index.reserve(extents.size());
+ extents_index.clear();
for (auto& p : extents) {
extents_index.emplace_back(allocated);
allocated += p.length;