diff options
author | Sage Weil <sage@newdream.net> | 2021-09-07 20:53:06 +0200 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2021-10-29 15:55:57 +0200 |
commit | 7f74551b7bdf13afb0f0d31b18bdafff91df3d82 (patch) | |
tree | de4ee683eaf04b2faa736d0fd6af1d91fe4bdfad | |
parent | blk/zoned: add get_zones() to fetch write pointers (diff) | |
download | ceph-7f74551b7bdf13afb0f0d31b18bdafff91df3d82.tar.xz ceph-7f74551b7bdf13afb0f0d31b18bdafff91df3d82.zip |
os/bluestore: fix startup vs device write pointers
Compare freelist write pointers to the device on startup.
Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r-- | src/os/bluestore/BlueStore.cc | 31 | ||||
-rw-r--r-- | src/os/bluestore/ZonedAllocator.cc | 2 | ||||
-rw-r--r-- | src/os/bluestore/ZonedAllocator.h | 2 | ||||
-rw-r--r-- | src/os/bluestore/zoned_types.h | 4 |
4 files changed, 34 insertions, 5 deletions
diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 2004f728645..459f7070b1b 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -5592,7 +5592,36 @@ int BlueStore::_init_alloc() ceph_assert(a); auto f = dynamic_cast<ZonedFreelistManager*>(fm); ceph_assert(f); - a->init_from_zone_pointers(f->get_zone_states(db), + vector<uint64_t> wp = bdev->get_zones(); + vector<zone_state_t> zones = f->get_zone_states(db); + ceph_assert(wp.size() == zones.size()); + + // reconcile zone state + auto num_zones = bdev->get_size() / zone_size; + for (unsigned i = first_sequential_zone; i < num_zones; ++i) { + ceph_assert(wp[i] >= i * zone_size); + ceph_assert(wp[i] <= (i + 1) * zone_size); // pos might be at start of next zone + uint64_t p = wp[i] - i * zone_size; + if (zones[i].write_pointer > p) { + derr << __func__ << " zone 0x" << std::hex << i + << " bluestore write pointer 0x" << zones[i].write_pointer + << " > device write pointer 0x" << p + << std::dec << dendl; + ceph_abort("bad write pointer"); + } else if (zones[i].write_pointer < p) { + // this is "normal" in that it can happen after any crash (if we have a + // write in flight but did not manage to commit the transaction) + auto delta = p - zones[i].write_pointer; + dout(1) << __func__ << " zone 0x" << std::hex << i + << " device write pointer 0x" << p + << " > bluestore pointer 0x" << zones[i].write_pointer + << ", advancing 0x" << delta << std::dec << dendl; + zones[i].num_dead_bytes += delta; + zones[i].write_pointer = p; + } + } + + a->init_from_zone_pointers(zones, &zoned_cleaner_lock, &zoned_cleaner_cond); dout(1) << __func__ diff --git a/src/os/bluestore/ZonedAllocator.cc b/src/os/bluestore/ZonedAllocator.cc index 0ec8000e17b..03326d2e5fc 100644 --- a/src/os/bluestore/ZonedAllocator.cc +++ b/src/os/bluestore/ZonedAllocator.cc @@ -151,7 +151,7 @@ void ZonedAllocator::dump(std::function<void(uint64_t offset, } void ZonedAllocator::init_from_zone_pointers( - std::vector<zone_state_t> &&_zone_states, + std::vector<zone_state_t> _zone_states, ceph::mutex *_cleaner_lock, ceph::condition_variable *_cleaner_cond) { diff --git a/src/os/bluestore/ZonedAllocator.h b/src/os/bluestore/ZonedAllocator.h index 73697e4cfd5..903b62fd1a0 100644 --- a/src/os/bluestore/ZonedAllocator.h +++ b/src/os/bluestore/ZonedAllocator.h @@ -96,7 +96,7 @@ public: void mark_zones_to_clean_free(void); void init_from_zone_pointers( - std::vector<zone_state_t> &&_zone_states, + std::vector<zone_state_t> _zone_states, ceph::mutex *_cleaner_lock, ceph::condition_variable *_cleaner_cond); void init_add_free(uint64_t offset, uint64_t length) override {} diff --git a/src/os/bluestore/zoned_types.h b/src/os/bluestore/zoned_types.h index c92f2ada099..93d3138399a 100644 --- a/src/os/bluestore/zoned_types.h +++ b/src/os/bluestore/zoned_types.h @@ -14,8 +14,8 @@ // We use the same struct for an on-disk and in-memory representation of the // state. struct zone_state_t { - uint64_t num_dead_bytes = 0; - uint64_t write_pointer = 0; + uint64_t num_dead_bytes = 0; ///< dead bytes deallocated (behind the write pointer) + uint64_t write_pointer = 0; ///< relative offset within the zone void encode(ceph::buffer::list &bl) const { using ceph::encode; |